#Apple Rhizosphere 16S File Processing with DADA2

Goals of this file

  1. Use raw fastq files and generate quality plots to assess quality of reads.
  2. Filter and trim out bad sequences and bases from our sequencing files.
  3. Write out fastq files with high quality sequences.
  4. Evaluate the quality from our filter and trim.
  5. Infer Errors on forward and reverse reads individually.
  6. Identify ASVs on forward and reverse reads separately, using the error model.
  7. Merge forward and reverse ASVs into “contiguous ASVs”.
  8. Generate the ASV count table. (otu_table input for phyloseq.).

Output that we need:

  1. ASV Count Table: otu_table
  2. Taxonomy Table tax_table
  3. Sample Information: sample_data track the reads lots throughout DADA2 workflow.

Load Libraries

#install.packages("devtools")
library("devtools")
## Loading required package: usethis
#devtools::install_github("thomasp85/patchwork@HEAD")
library(patchwork)
#install.packages("pacman")
library(pacman)
pacman::p_load(tidyverse, BiocManager, devtools, dada2, 
               phyloseq, patchwork, DT, iNEXT, vegan,
               install = TRUE)

Set Seed

set.seed(092819)

For 2022 Data

Load Data

# Set the raw fastq path to the raw sequencing files 
# Path to the fastq files 
raw_fastqs_path2 <- "data/01_DADA2/rawfastqs/data_2022"
raw_fastqs_path2
## [1] "data/01_DADA2/rawfastqs/data_2022"
# What files are in this path? Intuition Check 
head(list.files(raw_fastqs_path2))
## [1] "13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz" 
## [2] "13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R2.fastq.gz" 
## [3] "13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz" 
## [4] "13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R2.fastq.gz" 
## [5] "13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz"
## [6] "13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R2.fastq.gz"
# How many files are there? 
str(list.files(raw_fastqs_path2))
##  chr [1:122] "13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz" ...

Create vector of forward reads

forward_reads2 <- list.files(raw_fastqs_path2, pattern = "R1.fastq.gz", full.names = TRUE)  
# Intuition Check 
head(forward_reads2)  
## [1] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz" 
## [2] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz" 
## [3] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz"
## [4] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz"
## [5] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz"
## [6] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz"

Create a vector of reverse reads

reverse_reads2 <- list.files(raw_fastqs_path2, pattern = "R2.fastq.gz", full.names = TRUE)
head(reverse_reads2)
## [1] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R2.fastq.gz" 
## [2] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R2.fastq.gz" 
## [3] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R2.fastq.gz"
## [4] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R2.fastq.gz"
## [5] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R2.fastq.gz"
## [6] "data/01_DADA2/rawfastqs/data_2022/13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R2.fastq.gz"

Assess Raw Read Quality

Evaluate raw sequence quality prior to trimming

Plot 12 random samples of plots

# Randomly select 12 samples from dataset to evaluate 
random_samples2 <- sample(1:length(reverse_reads2), size = 12)
random_samples2
##  [1] 53 26  8  7 52 54 33  1 51  9 37 40
# Calculate and plot quality of these twelve samples
forward_plot_12_2 <- plotQualityProfile(forward_reads2[random_samples2]) + 
  labs(title = "Forward Read: Raw Quality 2022")

reverse_plot_12_2 <- plotQualityProfile(reverse_reads2[random_samples2]) + 
  labs(title = "Reverse Read: Raw Quality 2022")


# Plot them together with patchwork
forward_plot_12_2 + reverse_plot_12_2

Aggregated Raw Quality Plots

# Aggregate all QC plots 
# Forward reads
forward_preQC_plot2 <- 
  plotQualityProfile(forward_reads2, aggregate = TRUE) + 
  labs(title = "Forward Pre-QC 2022")

# reverse reads
reverse_preQC_plot2 <- 
  plotQualityProfile(reverse_reads2, aggregate = TRUE) + 
  labs(title = "Reverse Pre-QC 2022")

preQC_aggregate_plot2 <- 
  # Plot the forward and reverse together 
  forward_preQC_plot2 + reverse_preQC_plot2

# Show the plot
preQC_aggregate_plot2

The quality of the 2022 reads leaves a bit to be desired. We don’t want to truncate too much, but a bit at the beginning, and then the primer on the other end can be truncated. Filtering should help the quality a bit.

Prepare a placeholder for filtered reads

# vector of our samples, extract sample name from files 
samples2 <- paste0(sapply(strsplit(basename(forward_reads2), "_"), `[`,7), "_2022") 
# Intuition Check 
head(samples2)
## [1] "B4_2022"  "G2_2022"  "G10_2022" "G18_2022" "G26_2022" "G34_2022"
# Place filtered reads into filtered_fastqs_path
filtered_fastqs_path2 <- "data/01_DADA2/02_filtered_fastqs_2022"
filtered_fastqs_path2
## [1] "data/01_DADA2/02_filtered_fastqs_2022"
# create 2 variables: filtered_F, filtered_R
filtered_forward_reads2 <- 
  file.path(filtered_fastqs_path2, paste0(samples2, "_R1_filtered_2022.fastq.gz"))
length(filtered_forward_reads2)
## [1] 59
# reverse reads
filtered_reverse_reads2 <- 
  file.path(filtered_fastqs_path2, paste0(samples2, "_R2_filtered_2022.fastq.gz"))
length(filtered_reverse_reads2)
## [1] 59

Filter and Trim Reads

filtered_reads2<-filterAndTrim(forward_reads2, filtered_forward_reads2,
              reverse_reads2, filtered_reverse_reads2,   
              truncLen = c(247,247), trimLeft = c(18,21),
              maxN = 0, maxEE = c(2,2), truncQ = 2, 
              rm.phix = TRUE, compress = TRUE, 
              multithread = TRUE)

Assess Trimmed Read Quality

# Plot the 12 random samples after QC
forward_filteredQual_plot_12_2 <- 
  plotQualityProfile(filtered_forward_reads2[random_samples2]) + 
  labs(title = "Trimmed Forward Read Quality 2022")

reverse_filteredQual_plot_12_2 <- 
  plotQualityProfile(filtered_reverse_reads2[random_samples2]) + 
  labs(title = "Trimmed Reverse Read Quality 2022")

# Put the two plots together 
forward_filteredQual_plot_12_2 + reverse_filteredQual_plot_12_2

Aggregated Trimmed Plots

# Aggregate all QC plots 
# Forward reads
forward_postQC_plot2 <- 
  plotQualityProfile(filtered_forward_reads2, aggregate = TRUE) + 
  labs(title = "Forward Post-QC 2022")

# reverse reads
reverse_postQC_plot2 <- 
  plotQualityProfile(filtered_reverse_reads2, aggregate = TRUE) + 
  labs(title = "Reverse Post-QC 2022")

postQC_aggregate_plot2 <- 
  # Plot the forward and reverse together 
  forward_postQC_plot2 + reverse_postQC_plot2

# Show the plot
postQC_aggregate_plot2

Here, we see the sequences are improved from before. The very low quality that we saw in the pre-QC plots is mostly gone. There is some low quality towards the end on the reverse reads especially, but it will hopefully overlap with the good quality sections of the opposite read.

Stats on read output from filterAndTrim

# Make output into dataframe 
filtered_df2 <- as.data.frame(filtered_reads2)
head(filtered_df2)
##                                                                       reads.in
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz    204642
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz    175656
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz   138589
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz   202865
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz   202611
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz   188583
##                                                                       reads.out
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz     112436
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz     109166
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz     77962
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz    119856
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz    123024
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz    117249
# calculate some stats 
filtered_df2 %>%
  reframe(median_reads_in2 = median(reads.in),
          median_reads_out2 = median(reads.out),
          median_percent_retained2 = (median(reads.out)/median(reads.in)))
##   median_reads_in2 median_reads_out2 median_percent_retained2
## 1           182525            109166                0.5980879

About 60% of the reads were retained, this seems sufficient since sequencing depth was quite high.

Visualize QC differences in plot

# Plot the pre and post together in one plot
preQC_aggregate_plot2 / postQC_aggregate_plot2

Error Modelling

Learn the errors

# Forward reads 
error_forward_reads2 <- 
  learnErrors(filtered_forward_reads2, multithread = 5)
## 124219676 total bases in 542444 reads from 5 samples will be used for learning the error rates.
# Plot Forward  
forward_error_plot2 <- 
  plotErrors(error_forward_reads2, nominalQ = 5) + 
  labs(title = "Forward Read Error Model 2022")

# Reverse reads 
error_reverse_reads2 <- 
  learnErrors(filtered_reverse_reads2, multithread = 5)
## 122592344 total bases in 542444 reads from 5 samples will be used for learning the error rates.
# Plot reverse
reverse_error_plot2 <- 
  plotErrors(error_reverse_reads2, nominalQ = 5) + 
  labs(title = "Reverse Read Error Model 2022")

# Put the two plots together
forward_error_plot2 + reverse_error_plot2
## Warning in scale_y_log10(): log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.
## log-10 transformation introduced infinite values.

The black lines and dots seem to overlap. And the red lines are negative

Infer ASVs

An important note: This process occurs separately on forward and reverse reads! This is quite a different approach from how OTUs are identified in Mothur and also from UCHIME, oligotyping, and other OTU, MED, and ASV approaches.

# Infer ASVs on the forward sequences
dada_forward2 <- dada(filtered_forward_reads2,
                     err = error_forward_reads2, 
                     multithread = TRUE)
## Sample 1 - 112436 reads in 68956 unique sequences.
## Sample 2 - 109166 reads in 49976 unique sequences.
## Sample 3 - 77962 reads in 45012 unique sequences.
## Sample 4 - 119856 reads in 67776 unique sequences.
## Sample 5 - 123024 reads in 71091 unique sequences.
## Sample 6 - 117249 reads in 56999 unique sequences.
## Sample 7 - 11991 reads in 7335 unique sequences.
## Sample 8 - 122578 reads in 71834 unique sequences.
## Sample 9 - 129923 reads in 58317 unique sequences.
## Sample 10 - 619 reads in 165 unique sequences.
## Sample 11 - 121345 reads in 64854 unique sequences.
## Sample 12 - 122765 reads in 67515 unique sequences.
## Sample 13 - 124955 reads in 71659 unique sequences.
## Sample 14 - 135315 reads in 76111 unique sequences.
## Sample 15 - 89941 reads in 53170 unique sequences.
## Sample 16 - 32169 reads in 23790 unique sequences.
## Sample 17 - 95121 reads in 48112 unique sequences.
## Sample 18 - 37661 reads in 25718 unique sequences.
## Sample 19 - 121260 reads in 51392 unique sequences.
## Sample 20 - 86351 reads in 51126 unique sequences.
## Sample 21 - 121137 reads in 69518 unique sequences.
## Sample 22 - 77310 reads in 51780 unique sequences.
## Sample 23 - 107076 reads in 58551 unique sequences.
## Sample 24 - 115796 reads in 46192 unique sequences.
## Sample 25 - 121997 reads in 54600 unique sequences.
## Sample 26 - 81300 reads in 31371 unique sequences.
## Sample 27 - 1814 reads in 1268 unique sequences.
## Sample 28 - 103325 reads in 54054 unique sequences.
## Sample 29 - 67069 reads in 42886 unique sequences.
## Sample 30 - 112150 reads in 67588 unique sequences.
## Sample 31 - 101341 reads in 49967 unique sequences.
## Sample 32 - 108028 reads in 43995 unique sequences.
## Sample 33 - 69305 reads in 37780 unique sequences.
## Sample 34 - 119851 reads in 62978 unique sequences.
## Sample 35 - 114951 reads in 72340 unique sequences.
## Sample 36 - 79183 reads in 39694 unique sequences.
## Sample 37 - 95044 reads in 54630 unique sequences.
## Sample 38 - 110978 reads in 65018 unique sequences.
## Sample 39 - 112801 reads in 57948 unique sequences.
## Sample 40 - 85656 reads in 45822 unique sequences.
## Sample 41 - 106142 reads in 43993 unique sequences.
## Sample 42 - 110276 reads in 51369 unique sequences.
## Sample 43 - 48560 reads in 39016 unique sequences.
## Sample 44 - 85342 reads in 49813 unique sequences.
## Sample 45 - 69813 reads in 35459 unique sequences.
## Sample 46 - 73527 reads in 46708 unique sequences.
## Sample 47 - 104282 reads in 43524 unique sequences.
## Sample 48 - 126156 reads in 64364 unique sequences.
## Sample 49 - 44508 reads in 32424 unique sequences.
## Sample 50 - 140122 reads in 59219 unique sequences.
## Sample 51 - 85663 reads in 61164 unique sequences.
## Sample 52 - 123554 reads in 77923 unique sequences.
## Sample 53 - 125520 reads in 65871 unique sequences.
## Sample 54 - 109189 reads in 51382 unique sequences.
## Sample 55 - 134427 reads in 68129 unique sequences.
## Sample 56 - 153905 reads in 58591 unique sequences.
## Sample 57 - 136558 reads in 80163 unique sequences.
## Sample 58 - 114312 reads in 67788 unique sequences.
## Sample 59 - 51238 reads in 32244 unique sequences.
typeof(dada_forward2)
## [1] "list"
# Grab a sample and look at it 
dada_forward2$`20211005-MA-CWS1P_R1_filtered.fastq.gz`
## NULL
# Infer ASVs on the reverse sequences 
dada_reverse2 <- dada(filtered_reverse_reads2,
                     err = error_reverse_reads2,
                     multithread = TRUE)
## Sample 1 - 112436 reads in 62904 unique sequences.
## Sample 2 - 109166 reads in 43644 unique sequences.
## Sample 3 - 77962 reads in 46477 unique sequences.
## Sample 4 - 119856 reads in 62905 unique sequences.
## Sample 5 - 123024 reads in 67702 unique sequences.
## Sample 6 - 117249 reads in 49104 unique sequences.
## Sample 7 - 11991 reads in 7046 unique sequences.
## Sample 8 - 122578 reads in 65918 unique sequences.
## Sample 9 - 129923 reads in 54639 unique sequences.
## Sample 10 - 619 reads in 188 unique sequences.
## Sample 11 - 121345 reads in 57639 unique sequences.
## Sample 12 - 122765 reads in 60449 unique sequences.
## Sample 13 - 124955 reads in 75466 unique sequences.
## Sample 14 - 135315 reads in 81230 unique sequences.
## Sample 15 - 89941 reads in 45611 unique sequences.
## Sample 16 - 32169 reads in 20126 unique sequences.
## Sample 17 - 95121 reads in 38069 unique sequences.
## Sample 18 - 37661 reads in 22551 unique sequences.
## Sample 19 - 121260 reads in 49695 unique sequences.
## Sample 20 - 86351 reads in 48349 unique sequences.
## Sample 21 - 121137 reads in 60617 unique sequences.
## Sample 22 - 77310 reads in 40097 unique sequences.
## Sample 23 - 107076 reads in 52423 unique sequences.
## Sample 24 - 115796 reads in 39571 unique sequences.
## Sample 25 - 121997 reads in 50916 unique sequences.
## Sample 26 - 81300 reads in 30494 unique sequences.
## Sample 27 - 1814 reads in 1356 unique sequences.
## Sample 28 - 103325 reads in 43595 unique sequences.
## Sample 29 - 67069 reads in 35188 unique sequences.
## Sample 30 - 112150 reads in 59493 unique sequences.
## Sample 31 - 101341 reads in 50983 unique sequences.
## Sample 32 - 108028 reads in 37936 unique sequences.
## Sample 33 - 69305 reads in 26784 unique sequences.
## Sample 34 - 119851 reads in 64945 unique sequences.
## Sample 35 - 114951 reads in 67909 unique sequences.
## Sample 36 - 79183 reads in 35456 unique sequences.
## Sample 37 - 95044 reads in 48762 unique sequences.
## Sample 38 - 110978 reads in 64573 unique sequences.
## Sample 39 - 112801 reads in 52600 unique sequences.
## Sample 40 - 85656 reads in 46717 unique sequences.
## Sample 41 - 106142 reads in 39242 unique sequences.
## Sample 42 - 110276 reads in 48991 unique sequences.
## Sample 43 - 48560 reads in 31079 unique sequences.
## Sample 44 - 85342 reads in 48943 unique sequences.
## Sample 45 - 69813 reads in 27702 unique sequences.
## Sample 46 - 73527 reads in 36793 unique sequences.
## Sample 47 - 104282 reads in 54104 unique sequences.
## Sample 48 - 126156 reads in 58456 unique sequences.
## Sample 49 - 44508 reads in 27121 unique sequences.
## Sample 50 - 140122 reads in 60041 unique sequences.
## Sample 51 - 85663 reads in 51586 unique sequences.
## Sample 52 - 123554 reads in 66618 unique sequences.
## Sample 53 - 125520 reads in 54953 unique sequences.
## Sample 54 - 109189 reads in 55607 unique sequences.
## Sample 55 - 134427 reads in 62970 unique sequences.
## Sample 56 - 153905 reads in 52688 unique sequences.
## Sample 57 - 136558 reads in 69771 unique sequences.
## Sample 58 - 114312 reads in 66585 unique sequences.
## Sample 59 - 51238 reads in 30239 unique sequences.
# Inspect 
dada_reverse2[1]
## $B4_2022_R2_filtered_2022.fastq.gz
## dada-class: object describing DADA2 denoising results
## 2290 sequence variants were inferred from 62904 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16
dada_reverse2[30]
## $G6_2022_R2_filtered_2022.fastq.gz
## dada-class: object describing DADA2 denoising results
## 2616 sequence variants were inferred from 59493 input unique sequences.
## Key parameters: OMEGA_A = 1e-40, OMEGA_C = 1e-40, BAND_SIZE = 16

Merge Forward & Reverse ASVs

Now, merge the forward and reverse ASVs into contigs.

# merge forward and reverse ASVs
merged_ASVs2 <- mergePairs(dada_forward2, filtered_forward_reads2, 
                          dada_reverse2, filtered_reverse_reads2,
                          verbose = TRUE)
## 49650 paired-reads (in 4771 unique pairings) successfully merged out of 93414 (in 31589 pairings) input.
## 70513 paired-reads (in 5531 unique pairings) successfully merged out of 97239 (in 19859 pairings) input.
## 39986 paired-reads (in 3152 unique pairings) successfully merged out of 65963 (in 18724 pairings) input.
## 56269 paired-reads (in 6117 unique pairings) successfully merged out of 99115 (in 31676 pairings) input.
## 54587 paired-reads (in 6137 unique pairings) successfully merged out of 100466 (in 33930 pairings) input.
## 66448 paired-reads (in 4772 unique pairings) successfully merged out of 101369 (in 23076 pairings) input.
## 5274 paired-reads (in 660 unique pairings) successfully merged out of 8664 (in 2466 pairings) input.
## 55043 paired-reads (in 6267 unique pairings) successfully merged out of 100011 (in 33825 pairings) input.
## 82776 paired-reads (in 7591 unique pairings) successfully merged out of 114502 (in 25264 pairings) input.
## 618 paired-reads (in 8 unique pairings) successfully merged out of 618 (in 8 pairings) input.
## 64451 paired-reads (in 6342 unique pairings) successfully merged out of 103929 (in 28741 pairings) input.
## 61765 paired-reads (in 6656 unique pairings) successfully merged out of 102457 (in 30888 pairings) input.
## 55552 paired-reads (in 5780 unique pairings) successfully merged out of 102258 (in 33984 pairings) input.
## 62643 paired-reads (in 6976 unique pairings) successfully merged out of 111672 (in 37170 pairings) input.
## 41554 paired-reads (in 4395 unique pairings) successfully merged out of 74171 (in 24234 pairings) input.
## 13615 paired-reads (in 893 unique pairings) successfully merged out of 25688 (in 7939 pairings) input.
## 61198 paired-reads (in 5069 unique pairings) successfully merged out of 84914 (in 18730 pairings) input.
## 17508 paired-reads (in 1266 unique pairings) successfully merged out of 31038 (in 9579 pairings) input.
## 82943 paired-reads (in 6255 unique pairings) successfully merged out of 109246 (in 20489 pairings) input.
## 39592 paired-reads (in 4085 unique pairings) successfully merged out of 70406 (in 22361 pairings) input.
## 60408 paired-reads (in 6170 unique pairings) successfully merged out of 102557 (in 31510 pairings) input.
## 33875 paired-reads (in 3462 unique pairings) successfully merged out of 63589 (in 22540 pairings) input.
## 53826 paired-reads (in 4966 unique pairings) successfully merged out of 89975 (in 25543 pairings) input.
## 85572 paired-reads (in 7501 unique pairings) successfully merged out of 106644 (in 18745 pairings) input.
## 80055 paired-reads (in 6355 unique pairings) successfully merged out of 109653 (in 22786 pairings) input.
## 59250 paired-reads (in 5216 unique pairings) successfully merged out of 74224 (in 13035 pairings) input.
## 1075 paired-reads (in 36 unique pairings) successfully merged out of 1352 (in 121 pairings) input.
## 62363 paired-reads (in 5711 unique pairings) successfully merged out of 92399 (in 22319 pairings) input.
## 30545 paired-reads (in 2929 unique pairings) successfully merged out of 55120 (in 18017 pairings) input.
## 50796 paired-reads (in 5103 unique pairings) successfully merged out of 92042 (in 30564 pairings) input.
## 63142 paired-reads (in 5060 unique pairings) successfully merged out of 90488 (in 20184 pairings) input.
## 75445 paired-reads (in 6064 unique pairings) successfully merged out of 98037 (in 17698 pairings) input.
## 42754 paired-reads (in 3704 unique pairings) successfully merged out of 61015 (in 14531 pairings) input.
## 62187 paired-reads (in 6507 unique pairings) successfully merged out of 101601 (in 28948 pairings) input.
## 45301 paired-reads (in 5811 unique pairings) successfully merged out of 91508 (in 34654 pairings) input.
## 46754 paired-reads (in 4188 unique pairings) successfully merged out of 67863 (in 16343 pairings) input.
## 44739 paired-reads (in 4367 unique pairings) successfully merged out of 79101 (in 24920 pairings) input.
## 49601 paired-reads (in 5039 unique pairings) successfully merged out of 90384 (in 29447 pairings) input.
## 63676 paired-reads (in 6111 unique pairings) successfully merged out of 96954 (in 25501 pairings) input.
## 43901 paired-reads (in 3798 unique pairings) successfully merged out of 71567 (in 19336 pairings) input.
## 72987 paired-reads (in 5562 unique pairings) successfully merged out of 95548 (in 17666 pairings) input.
## 67906 paired-reads (in 5937 unique pairings) successfully merged out of 97473 (in 21918 pairings) input.
## 15568 paired-reads (in 1878 unique pairings) successfully merged out of 37812 (in 16890 pairings) input.
## 41296 paired-reads (in 3835 unique pairings) successfully merged out of 70701 (in 21340 pairings) input.
## 44695 paired-reads (in 3650 unique pairings) successfully merged out of 63081 (in 13532 pairings) input.
## 36295 paired-reads (in 3069 unique pairings) successfully merged out of 62420 (in 19313 pairings) input.
## 71774 paired-reads (in 5764 unique pairings) successfully merged out of 94056 (in 18039 pairings) input.
## 70898 paired-reads (in 6675 unique pairings) successfully merged out of 109498 (in 28811 pairings) input.
## 18602 paired-reads (in 1430 unique pairings) successfully merged out of 36115 (in 12402 pairings) input.
## 95770 paired-reads (in 8678 unique pairings) successfully merged out of 128013 (in 25530 pairings) input.
## 30090 paired-reads (in 3784 unique pairings) successfully merged out of 67193 (in 28610 pairings) input.
## 50808 paired-reads (in 5754 unique pairings) successfully merged out of 100377 (in 36909 pairings) input.
## 69617 paired-reads (in 6588 unique pairings) successfully merged out of 107950 (in 28278 pairings) input.
## 64571 paired-reads (in 5401 unique pairings) successfully merged out of 95653 (in 21409 pairings) input.
## 78036 paired-reads (in 6930 unique pairings) successfully merged out of 117539 (in 29913 pairings) input.
## 107997 paired-reads (in 7429 unique pairings) successfully merged out of 140275 (in 24291 pairings) input.
## 59170 paired-reads (in 6853 unique pairings) successfully merged out of 110322 (in 38002 pairings) input.
## 50333 paired-reads (in 5647 unique pairings) successfully merged out of 93645 (in 31975 pairings) input.
## 21459 paired-reads (in 2606 unique pairings) successfully merged out of 39700 (in 13566 pairings) input.
# Evaluate the output 
typeof(merged_ASVs2)
## [1] "list"
length(merged_ASVs2)
## [1] 59
names(merged_ASVs2)
##  [1] "B4_2022_R1_filtered_2022.fastq.gz"  "G2_2022_R1_filtered_2022.fastq.gz" 
##  [3] "G10_2022_R1_filtered_2022.fastq.gz" "G18_2022_R1_filtered_2022.fastq.gz"
##  [5] "G26_2022_R1_filtered_2022.fastq.gz" "G34_2022_R1_filtered_2022.fastq.gz"
##  [7] "V2_2022_R1_filtered_2022.fastq.gz"  "B5_2022_R1_filtered_2022.fastq.gz" 
##  [9] "G3_2022_R1_filtered_2022.fastq.gz"  "G11_2022_R1_filtered_2022.fastq.gz"
## [11] "G19_2022_R1_filtered_2022.fastq.gz" "G27_2022_R1_filtered_2022.fastq.gz"
## [13] "G35_2022_R1_filtered_2022.fastq.gz" "V3_2022_R1_filtered_2022.fastq.gz" 
## [15] "B6_2022_R1_filtered_2022.fastq.gz"  "G4_2022_R1_filtered_2022.fastq.gz" 
## [17] "G12_2022_R1_filtered_2022.fastq.gz" "G20_2022_R1_filtered_2022.fastq.gz"
## [19] "G28_2022_R1_filtered_2022.fastq.gz" "M1_2022_R1_filtered_2022.fastq.gz" 
## [21] "V4_2022_R1_filtered_2022.fastq.gz"  "B7_2022_R1_filtered_2022.fastq.gz" 
## [23] "G5_2022_R1_filtered_2022.fastq.gz"  "G13_2022_R1_filtered_2022.fastq.gz"
## [25] "G21_2022_R1_filtered_2022.fastq.gz" "G29_2022_R1_filtered_2022.fastq.gz"
## [27] "M2_2022_R1_filtered_2022.fastq.gz"  "V5_2022_R1_filtered_2022.fastq.gz" 
## [29] "B8_2022_R1_filtered_2022.fastq.gz"  "G6_2022_R1_filtered_2022.fastq.gz" 
## [31] "G14_2022_R1_filtered_2022.fastq.gz" "G22_2022_R1_filtered_2022.fastq.gz"
## [33] "G30_2022_R1_filtered_2022.fastq.gz" "M3_2022_R1_filtered_2022.fastq.gz" 
## [35] "S1_2022_R1_filtered_2022.fastq.gz"  "B1_2022_R1_filtered_2022.fastq.gz" 
## [37] "B9_2022_R1_filtered_2022.fastq.gz"  "G7_2022_R1_filtered_2022.fastq.gz" 
## [39] "G15_2022_R1_filtered_2022.fastq.gz" "G23_2022_R1_filtered_2022.fastq.gz"
## [41] "G31_2022_R1_filtered_2022.fastq.gz" "M4_2022_R1_filtered_2022.fastq.gz" 
## [43] "S2_2022_R1_filtered_2022.fastq.gz"  "B2_2022_R1_filtered_2022.fastq.gz" 
## [45] "B10_2022_R1_filtered_2022.fastq.gz" "G8_2022_R1_filtered_2022.fastq.gz" 
## [47] "G16_2022_R1_filtered_2022.fastq.gz" "G24_2022_R1_filtered_2022.fastq.gz"
## [49] "G32_2022_R1_filtered_2022.fastq.gz" "M5_2022_R1_filtered_2022.fastq.gz" 
## [51] "S3_2022_R1_filtered_2022.fastq.gz"  "B3_2022_R1_filtered_2022.fastq.gz" 
## [53] "G1_2022_R1_filtered_2022.fastq.gz"  "G9_2022_R1_filtered_2022.fastq.gz" 
## [55] "G17_2022_R1_filtered_2022.fastq.gz" "G25_2022_R1_filtered_2022.fastq.gz"
## [57] "G33_2022_R1_filtered_2022.fastq.gz" "V1_2022_R1_filtered_2022.fastq.gz" 
## [59] "S4_2022_R1_filtered_2022.fastq.gz"
# Inspect the merger data.frame from the 20210602-MA-ABB1P 
head(merged_ASVs2[[3]])
##                                                                                                                                                                                                                                                                                                                                                                                                                                     sequence
## 1 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## 2                     GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## 3 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTGGTGTCTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACAAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## 4 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## 5                     GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## 6 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTGGTGTCTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
##   abundance forward reverse nmatch nmismatch nindel prefer accept
## 1      2024       1       1     29         0      0      2   TRUE
## 2      1499       5       3     49         0      0      2   TRUE
## 3      1330       2       2     29         0      0      1   TRUE
## 4      1037       3       1     29         0      0      2   TRUE
## 5      1028       7       3     49         0      0      2   TRUE
## 6      1026       2       1     29         0      0      2   TRUE
str(merged_ASVs2)
## List of 59
##  $ B4_2022_R1_filtered_2022.fastq.gz :'data.frame':  4771 obs. of  9 variables:
##   ..$ sequence : chr [1:4771] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:4771] 953 624 609 603 513 479 468 459 390 347 ...
##   ..$ forward  : int [1:4771] 1 3 5 2 6 8 4 7 10 13 ...
##   ..$ reverse  : int [1:4771] 1 2 4 2 9 8 5 7 6 19 ...
##   ..$ nmatch   : int [1:4771] 29 49 54 49 29 54 29 54 29 49 ...
##   ..$ nmismatch: int [1:4771] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:4771] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:4771] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:4771] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G2_2022_R1_filtered_2022.fastq.gz :'data.frame':  5531 obs. of  9 variables:
##   ..$ sequence : chr [1:5531] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "AAGGAATATTGGTCAATGGAGGCAACTCTGAACCAGCCATGCCGCGTGCAGGAAGACTGCCCTATGGGTTGTAAACTGCTTTTATCCGGGAATAAACCACATTACGTGTAA"| __truncated__ ...
##   ..$ abundance: int [1:5531] 4162 2924 1935 1612 1573 1387 884 871 861 833 ...
##   ..$ forward  : int [1:5531] 2 1 3 5 6 1 4 9 8 11 ...
##   ..$ reverse  : int [1:5531] 1 2 4 3 1 6 2 10 1 3 ...
##   ..$ nmatch   : int [1:5531] 29 29 54 34 29 29 29 34 29 34 ...
##   ..$ nmismatch: int [1:5531] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5531] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5531] 2 1 2 2 2 1 2 2 2 2 ...
##   ..$ accept   : logi [1:5531] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G10_2022_R1_filtered_2022.fastq.gz:'data.frame':  3152 obs. of  9 variables:
##   ..$ sequence : chr [1:3152] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:3152] 2024 1499 1330 1037 1028 1026 637 601 548 320 ...
##   ..$ forward  : int [1:3152] 1 5 2 3 7 2 4 6 8 1 ...
##   ..$ reverse  : int [1:3152] 1 3 2 1 3 1 5 4 2 2 ...
##   ..$ nmatch   : int [1:3152] 29 49 29 29 49 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:3152] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:3152] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:3152] 2 2 1 2 2 2 1 2 2 1 ...
##   ..$ accept   : logi [1:3152] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G18_2022_R1_filtered_2022.fastq.gz:'data.frame':  6117 obs. of  9 variables:
##   ..$ sequence : chr [1:6117] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:6117] 1361 1334 1120 998 671 630 509 395 393 392 ...
##   ..$ forward  : int [1:6117] 3 4 2 6 5 1 1 10 11 7 ...
##   ..$ reverse  : int [1:6117] 3 2 1 2 5 8 7 9 10 1 ...
##   ..$ nmatch   : int [1:6117] 54 49 29 49 29 29 29 29 54 29 ...
##   ..$ nmismatch: int [1:6117] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6117] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6117] 2 2 2 2 2 1 1 2 2 2 ...
##   ..$ accept   : logi [1:6117] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G26_2022_R1_filtered_2022.fastq.gz:'data.frame':  6137 obs. of  9 variables:
##   ..$ sequence : chr [1:6137] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ ...
##   ..$ abundance: int [1:6137] 2388 1035 754 701 457 417 410 378 377 327 ...
##   ..$ forward  : int [1:6137] 1 2 3 4 5 9 8 13 12 7 ...
##   ..$ reverse  : int [1:6137] 1 2 4 5 6 6 7 11 8 13 ...
##   ..$ nmatch   : int [1:6137] 54 29 29 29 49 49 54 54 54 54 ...
##   ..$ nmismatch: int [1:6137] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6137] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6137] 2 2 2 2 2 2 2 2 2 1 ...
##   ..$ accept   : logi [1:6137] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G34_2022_R1_filtered_2022.fastq.gz:'data.frame':  4772 obs. of  9 variables:
##   ..$ sequence : chr [1:4772] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCACGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ ...
##   ..$ abundance: int [1:4772] 21896 1000 826 761 644 533 517 492 419 374 ...
##   ..$ forward  : int [1:4772] 1 1 2 3 4 5 6 33 7 8 ...
##   ..$ reverse  : int [1:4772] 1 4 3 1 2 3 5 1 5 3 ...
##   ..$ nmatch   : int [1:4772] 54 54 29 54 29 29 49 54 49 54 ...
##   ..$ nmismatch: int [1:4772] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:4772] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:4772] 2 1 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:4772] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ V2_2022_R1_filtered_2022.fastq.gz :'data.frame':  660 obs. of  9 variables:
##   ..$ sequence : chr [1:660] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ ...
##   ..$ abundance: int [1:660] 661 142 120 107 99 73 68 60 51 51 ...
##   ..$ forward  : int [1:660] 1 2 3 4 6 197 7 8 85 104 ...
##   ..$ reverse  : int [1:660] 1 3 2 5 4 4 2 8 19 13 ...
##   ..$ nmatch   : int [1:660] 54 29 29 29 49 49 29 54 49 54 ...
##   ..$ nmismatch: int [1:660] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:660] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:660] 2 1 2 2 2 2 2 2 1 2 ...
##   ..$ accept   : logi [1:660] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ B5_2022_R1_filtered_2022.fastq.gz :'data.frame':  6267 obs. of  9 variables:
##   ..$ sequence : chr [1:6267] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ ...
##   ..$ abundance: int [1:6267] 1789 724 656 480 404 383 311 297 275 263 ...
##   ..$ forward  : int [1:6267] 1 3 2 4 1 1 6 5 13 8 ...
##   ..$ reverse  : int [1:6267] 1 3 3 4 7 5 2 2 12 8 ...
##   ..$ nmatch   : int [1:6267] 29 49 49 29 29 29 29 29 49 29 ...
##   ..$ nmismatch: int [1:6267] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6267] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6267] 1 2 2 2 1 1 2 2 2 2 ...
##   ..$ accept   : logi [1:6267] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G3_2022_R1_filtered_2022.fastq.gz :'data.frame':  7591 obs. of  9 variables:
##   ..$ sequence : chr [1:7591] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGCGAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:7591] 5748 3968 1611 1277 1242 1057 834 819 786 723 ...
##   ..$ forward  : int [1:7591] 1 2 3 6 5 7 1 8 11 4 ...
##   ..$ reverse  : int [1:7591] 1 1 2 4 1 4 5 3 2 6 ...
##   ..$ nmatch   : int [1:7591] 29 29 29 49 29 49 29 29 29 29 ...
##   ..$ nmismatch: int [1:7591] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:7591] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:7591] 2 2 2 2 2 2 1 2 2 1 ...
##   ..$ accept   : logi [1:7591] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G11_2022_R1_filtered_2022.fastq.gz:'data.frame':  8 obs. of  9 variables:
##   ..$ sequence : chr [1:8] "GGGGAATTTTGGACAATGGGCGCAAGCCTGATCCAGCAATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTTGTCCGGAAAGAAAACTTCTGGGTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGGGGAGGAAGGGAGTAAAGTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTCGGGTTGTAAAGCCCTTTTGTCCGGAACGAAAAGCGATCGGTTAATAC"| __truncated__ "GGGGAATTTTGGACAATGGGGGCAACCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCGGCCGGAACGAAATCGCGCGGGCGAATAT"| __truncated__ ...
##   ..$ abundance: int [1:8] 263 156 77 59 35 17 8 3
##   ..$ forward  : int [1:8] 1 2 3 4 5 6 8 7
##   ..$ reverse  : int [1:8] 1 2 3 4 5 6 7 8
##   ..$ nmatch   : int [1:8] 29 29 29 29 28 49 29 53
##   ..$ nmismatch: int [1:8] 0 0 0 0 0 0 0 0
##   ..$ nindel   : int [1:8] 0 0 0 0 0 0 0 0
##   ..$ prefer   : num [1:8] 1 1 1 2 2 1 2 1
##   ..$ accept   : logi [1:8] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G19_2022_R1_filtered_2022.fastq.gz:'data.frame':  6342 obs. of  9 variables:
##   ..$ sequence : chr [1:6342] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ ...
##   ..$ abundance: int [1:6342] 3282 1904 1778 1351 778 703 615 578 546 540 ...
##   ..$ forward  : int [1:6342] 1 3 4 2 8 7 10 6 11 9 ...
##   ..$ reverse  : int [1:6342] 1 2 1 3 4 4 1 7 1 5 ...
##   ..$ nmatch   : int [1:6342] 29 54 29 29 49 49 29 29 29 29 ...
##   ..$ nmismatch: int [1:6342] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6342] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6342] 2 2 2 1 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:6342] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G27_2022_R1_filtered_2022.fastq.gz:'data.frame':  6656 obs. of  9 variables:
##   ..$ sequence : chr [1:6656] "GGGGAATTTTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGCGTGAGTGAAGAAGGCCTTCGGGTTGTAAAGCTCTTTTGTCAGGGAAGAAACGGTGAGAGCTAATAT"| __truncated__ "GGGGAATTTTGGACAATGGGCGAAAGCCTGATCCAGCAATGCCGCGTGAGTGAAGAAGGCCTTCGGGTTGTAAAGCTCTTTTGTCAGGGAAGAAACGGTGAGAGCTAATAT"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:6656] 2958 1782 895 798 628 574 555 515 452 420 ...
##   ..$ forward  : int [1:6656] 1 2 6 5 3 4 10 7 13 14 ...
##   ..$ reverse  : int [1:6656] 1 1 3 3 4 6 11 2 12 10 ...
##   ..$ nmatch   : int [1:6656] 29 29 49 49 29 29 49 29 29 29 ...
##   ..$ nmismatch: int [1:6656] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6656] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6656] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:6656] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G35_2022_R1_filtered_2022.fastq.gz:'data.frame':  5780 obs. of  9 variables:
##   ..$ sequence : chr [1:5780] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ ...
##   ..$ abundance: int [1:5780] 2781 835 795 384 382 375 352 350 261 237 ...
##   ..$ forward  : int [1:5780] 1 3 2 5 6 7 8 9 4 13 ...
##   ..$ reverse  : int [1:5780] 1 2 2 5 3 12 4 8 6 10 ...
##   ..$ nmatch   : int [1:5780] 54 49 49 29 29 49 54 54 29 54 ...
##   ..$ nmismatch: int [1:5780] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5780] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5780] 2 2 2 1 2 1 2 2 1 2 ...
##   ..$ accept   : logi [1:5780] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ V3_2022_R1_filtered_2022.fastq.gz :'data.frame':  6976 obs. of  9 variables:
##   ..$ sequence : chr [1:6976] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAATTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:6976] 1334 1003 843 793 635 590 550 539 511 462 ...
##   ..$ forward  : int [1:6976] 1 5 6 3 2 7 2 4 8 11 ...
##   ..$ reverse  : int [1:6976] 1 4 4 2 6 7 5 5 9 8 ...
##   ..$ nmatch   : int [1:6976] 29 49 49 29 29 29 29 29 54 29 ...
##   ..$ nmismatch: int [1:6976] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6976] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6976] 2 2 2 2 1 1 1 2 2 2 ...
##   ..$ accept   : logi [1:6976] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ B6_2022_R1_filtered_2022.fastq.gz :'data.frame':  4395 obs. of  9 variables:
##   ..$ sequence : chr [1:4395] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCCATGCCGCGTGAGTGATGAAGGCCCTAGGGTTGTAAAGCTCTTTCACCGGAGAAGATAATGACGGTATCCGGAG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:4395] 934 772 723 657 593 591 392 323 310 259 ...
##   ..$ forward  : int [1:4395] 2 3 1 6 5 4 1 8 9 11 ...
##   ..$ reverse  : int [1:4395] 4 3 1 2 2 6 7 5 8 9 ...
##   ..$ nmatch   : int [1:4395] 54 54 29 49 49 29 29 29 29 54 ...
##   ..$ nmismatch: int [1:4395] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:4395] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:4395] 2 2 2 2 2 2 1 2 2 2 ...
##   ..$ accept   : logi [1:4395] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G4_2022_R1_filtered_2022.fastq.gz :'data.frame':  893 obs. of  9 variables:
##   ..$ sequence : chr [1:893] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:893] 2032 498 278 266 255 231 209 204 194 189 ...
##   ..$ forward  : int [1:893] 1 2 81 6 3 9 7 4 10 11 ...
##   ..$ reverse  : int [1:893] 1 3 7 2 4 7 2 6 2 8 ...
##   ..$ nmatch   : int [1:893] 54 29 49 29 29 49 29 29 29 29 ...
##   ..$ nmismatch: int [1:893] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:893] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:893] 1 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:893] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G12_2022_R1_filtered_2022.fastq.gz:'data.frame':  5069 obs. of  9 variables:
##   ..$ sequence : chr [1:5069] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:5069] 4267 3214 2452 1749 1196 911 822 799 789 610 ...
##   ..$ forward  : int [1:5069] 1 4 3 2 6 5 9 8 2 14 ...
##   ..$ reverse  : int [1:5069] 1 1 2 2 1 5 6 6 3 8 ...
##   ..$ nmatch   : int [1:5069] 29 29 29 29 29 29 49 49 29 34 ...
##   ..$ nmismatch: int [1:5069] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5069] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5069] 2 2 2 2 2 2 2 2 1 2 ...
##   ..$ accept   : logi [1:5069] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G20_2022_R1_filtered_2022.fastq.gz:'data.frame':  1266 obs. of  9 variables:
##   ..$ sequence : chr [1:1266] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:1266] 1203 393 374 335 300 294 290 278 202 195 ...
##   ..$ forward  : int [1:1266] 1 1 3 6 1 7 5 4 8 2 ...
##   ..$ reverse  : int [1:1266] 1 3 5 6 4 7 6 2 2 3 ...
##   ..$ nmatch   : int [1:1266] 29 29 29 49 29 54 49 29 29 29 ...
##   ..$ nmismatch: int [1:1266] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:1266] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:1266] 1 1 2 2 1 2 2 2 2 2 ...
##   ..$ accept   : logi [1:1266] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G28_2022_R1_filtered_2022.fastq.gz:'data.frame':  6255 obs. of  9 variables:
##   ..$ sequence : chr [1:6255] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GAGGAATATTGGTCAATGGACGCAAGTCTGAACCAGCCATGCCGCGTGCAGGATGACGGTCCTATGGATTGTAAACTGCTTTTGTACGAGAAGAAACACTCCTATGTATAG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:6255] 8112 4181 2354 2039 1892 1601 1567 1365 967 792 ...
##   ..$ forward  : int [1:6255] 1 2 4 5 3 6 7 8 10 13 ...
##   ..$ reverse  : int [1:6255] 1 2 1 1 3 1 1 6 4 1 ...
##   ..$ nmatch   : int [1:6255] 29 34 29 29 29 29 29 54 34 29 ...
##   ..$ nmismatch: int [1:6255] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6255] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6255] 2 2 2 2 1 2 2 2 2 2 ...
##   ..$ accept   : logi [1:6255] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ M1_2022_R1_filtered_2022.fastq.gz :'data.frame':  4085 obs. of  9 variables:
##   ..$ sequence : chr [1:4085] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGCAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:4085] 1360 806 637 467 310 310 306 266 259 253 ...
##   ..$ forward  : int [1:4085] 1 2 3 4 7 6 10 43 8 14 ...
##   ..$ reverse  : int [1:4085] 1 2 2 5 6 4 7 28 3 10 ...
##   ..$ nmatch   : int [1:4085] 54 49 49 49 54 54 29 49 29 54 ...
##   ..$ nmismatch: int [1:4085] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:4085] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:4085] 2 2 2 2 2 2 2 1 2 2 ...
##   ..$ accept   : logi [1:4085] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ V4_2022_R1_filtered_2022.fastq.gz :'data.frame':  6170 obs. of  9 variables:
##   ..$ sequence : chr [1:6170] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAACTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:6170] 1490 1483 1324 1089 1031 1030 683 643 570 567 ...
##   ..$ forward  : int [1:6170] 2 1 4 5 7 3 8 6 9 10 ...
##   ..$ reverse  : int [1:6170] 1 2 2 3 3 4 1 7 1 6 ...
##   ..$ nmatch   : int [1:6170] 29 29 29 49 49 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:6170] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6170] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6170] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:6170] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ B7_2022_R1_filtered_2022.fastq.gz :'data.frame':  3462 obs. of  9 variables:
##   ..$ sequence : chr [1:3462] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTTACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:3462] 858 724 659 357 329 282 259 228 215 174 ...
##   ..$ forward  : int [1:3462] 1 2 3 5 6 4 9 15 10 14 ...
##   ..$ reverse  : int [1:3462] 2 1 1 4 3 5 8 17 9 7 ...
##   ..$ nmatch   : int [1:3462] 54 49 49 29 29 29 29 49 29 54 ...
##   ..$ nmismatch: int [1:3462] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:3462] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:3462] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:3462] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G5_2022_R1_filtered_2022.fastq.gz :'data.frame':  4966 obs. of  9 variables:
##   ..$ sequence : chr [1:4966] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ ...
##   ..$ abundance: int [1:4966] 4058 3218 1626 513 482 360 355 335 312 278 ...
##   ..$ forward  : int [1:4966] 1 2 3 5 6 7 11 9 4 12 ...
##   ..$ reverse  : int [1:4966] 1 1 2 7 3 5 9 8 6 10 ...
##   ..$ nmatch   : int [1:4966] 49 49 54 29 29 29 54 29 29 29 ...
##   ..$ nmismatch: int [1:4966] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:4966] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:4966] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:4966] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G13_2022_R1_filtered_2022.fastq.gz:'data.frame':  7501 obs. of  9 variables:
##   ..$ sequence : chr [1:7501] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GAGGAATATTGGTCAATGGACGCAAGTCTGAACCAGCCATGCCGCGTGCAGGATGACGGTCCTATGGATTGTAAACTGCTTTTGTACGAGAAGAAACATCCCGACGTGTCG"| __truncated__ "GAGGAATATTGGTCAATGGACGCAAGTCTGAACCAGCCATGCCGCGTGCAGGATGACGGTCCTATGGATTGTAAACTGCTTTTGTACGAGAAGAAACACTCCTATGTATAG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:7501] 4336 2491 1867 1750 1604 1377 1158 1154 1137 1027 ...
##   ..$ forward  : int [1:7501] 1 3 8 5 6 2 12 2 4 13 ...
##   ..$ reverse  : int [1:7501] 2 3 3 1 1 4 10 6 5 9 ...
##   ..$ nmatch   : int [1:7501] 29 34 34 29 29 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:7501] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:7501] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:7501] 2 2 2 2 2 1 2 1 2 2 ...
##   ..$ accept   : logi [1:7501] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G21_2022_R1_filtered_2022.fastq.gz:'data.frame':  6355 obs. of  9 variables:
##   ..$ sequence : chr [1:6355] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGGTCGCTAATAA"| __truncated__ "GAGGAATATTGGACAATGGGTGAGAGCCTGATCCAGCCATCCCGCGTGAAGGACGACGGCCCTATGGGTTGTAAACTTCTTTTGTATAGGGATAAACCTACTCTCGTGAGA"| __truncated__ ...
##   ..$ abundance: int [1:6355] 5440 2601 2554 2229 1974 1491 1434 1409 1263 1211 ...
##   ..$ forward  : int [1:6355] 1 3 4 5 6 7 2 9 11 2 ...
##   ..$ reverse  : int [1:6355] 1 1 1 2 1 4 3 1 1 5 ...
##   ..$ nmatch   : int [1:6355] 29 29 29 34 29 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:6355] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6355] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6355] 2 2 2 2 2 2 1 2 2 1 ...
##   ..$ accept   : logi [1:6355] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G29_2022_R1_filtered_2022.fastq.gz:'data.frame':  5216 obs. of  9 variables:
##   ..$ sequence : chr [1:5216] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:5216] 4003 2658 1509 1201 1091 900 824 786 781 771 ...
##   ..$ forward  : int [1:5216] 1 2 5 6 2 10 4 13 7 3 ...
##   ..$ reverse  : int [1:5216] 1 2 4 4 6 3 7 3 5 6 ...
##   ..$ nmatch   : int [1:5216] 29 29 49 49 29 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:5216] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5216] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5216] 1 1 2 2 1 2 1 2 2 2 ...
##   ..$ accept   : logi [1:5216] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ M2_2022_R1_filtered_2022.fastq.gz :'data.frame':  36 obs. of  9 variables:
##   ..$ sequence : chr [1:36] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:36] 375 95 86 79 57 52 43 34 30 28 ...
##   ..$ forward  : int [1:36] 1 2 1 2 1 7 2 10 7 3 ...
##   ..$ reverse  : int [1:36] 1 2 2 1 11 2 11 3 1 4 ...
##   ..$ nmatch   : int [1:36] 29 29 29 29 29 29 29 29 29 54 ...
##   ..$ nmismatch: int [1:36] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:36] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:36] 1 1 1 2 1 2 1 2 2 1 ...
##   ..$ accept   : logi [1:36] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ V5_2022_R1_filtered_2022.fastq.gz :'data.frame':  5711 obs. of  9 variables:
##   ..$ sequence : chr [1:5711] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "AGGGAATCTTCCACAATGGACGAAAGTCTGATGGAGCAACGCCGCGTGAGCGATGAAGGCCTTCGGGTCGTAAAGCTCTGTTGTTAGGGAAGAACAAGTACCGGAGTAACT"| __truncated__ ...
##   ..$ abundance: int [1:5711] 10284 1388 1370 995 714 645 644 547 459 424 ...
##   ..$ forward  : int [1:5711] 1 4 3 5 7 2 2 8 1 9 ...
##   ..$ reverse  : int [1:5711] 1 3 3 5 2 2 7 6 10 2 ...
##   ..$ nmatch   : int [1:5711] 54 49 49 29 29 29 29 29 54 29 ...
##   ..$ nmismatch: int [1:5711] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5711] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5711] 2 2 2 2 2 2 1 2 1 2 ...
##   ..$ accept   : logi [1:5711] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ B8_2022_R1_filtered_2022.fastq.gz :'data.frame':  2929 obs. of  9 variables:
##   ..$ sequence : chr [1:2929] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:2929] 1026 729 640 512 467 456 281 276 255 171 ...
##   ..$ forward  : int [1:2929] 1 3 2 6 5 4 8 10 9 14 ...
##   ..$ reverse  : int [1:2929] 2 1 4 3 5 3 1 6 9 17 ...
##   ..$ nmatch   : int [1:2929] 54 29 29 49 54 49 29 49 53 49 ...
##   ..$ nmismatch: int [1:2929] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:2929] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:2929] 2 2 2 2 2 2 2 2 2 1 ...
##   ..$ accept   : logi [1:2929] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G6_2022_R1_filtered_2022.fastq.gz :'data.frame':  5103 obs. of  9 variables:
##   ..$ sequence : chr [1:5103] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:5103] 1538 974 782 735 494 457 318 298 280 273 ...
##   ..$ forward  : int [1:5103] 1 3 5 2 4 6 7 4 13 9 ...
##   ..$ reverse  : int [1:5103] 1 2 2 3 5 3 4 8 9 6 ...
##   ..$ nmatch   : int [1:5103] 54 49 49 29 29 29 29 29 51 54 ...
##   ..$ nmismatch: int [1:5103] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5103] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5103] 2 2 2 2 1 2 2 1 2 2 ...
##   ..$ accept   : logi [1:5103] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G14_2022_R1_filtered_2022.fastq.gz:'data.frame':  5060 obs. of  9 variables:
##   ..$ sequence : chr [1:5060] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGATAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGCGAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:5060] 4149 2483 2182 1039 1022 878 670 660 489 422 ...
##   ..$ forward  : int [1:5060] 1 2 3 4 8 9 6 5 2 5 ...
##   ..$ reverse  : int [1:5060] 1 1 2 1 2 4 7 6 5 1 ...
##   ..$ nmatch   : int [1:5060] 29 29 49 29 49 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:5060] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5060] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5060] 2 2 2 2 2 2 1 1 1 2 ...
##   ..$ accept   : logi [1:5060] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G22_2022_R1_filtered_2022.fastq.gz:'data.frame':  6064 obs. of  9 variables:
##   ..$ sequence : chr [1:6064] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGGTCGCTAATAA"| __truncated__ ...
##   ..$ abundance: int [1:6064] 3352 2656 1589 1479 1286 1254 1213 1200 1108 1034 ...
##   ..$ forward  : int [1:6064] 1 2 4 7 6 1 5 8 10 9 ...
##   ..$ reverse  : int [1:6064] 3 2 1 1 4 2 6 7 1 4 ...
##   ..$ nmatch   : int [1:6064] 29 29 29 29 34 29 34 34 29 34 ...
##   ..$ nmismatch: int [1:6064] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6064] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6064] 1 2 2 2 2 1 2 2 2 2 ...
##   ..$ accept   : logi [1:6064] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G30_2022_R1_filtered_2022.fastq.gz:'data.frame':  3704 obs. of  9 variables:
##   ..$ sequence : chr [1:3704] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ ...
##   ..$ abundance: int [1:3704] 3368 1298 1263 1004 958 951 895 850 809 746 ...
##   ..$ forward  : int [1:3704] 1 4 3 7 5 6 2 10 2 9 ...
##   ..$ reverse  : int [1:3704] 1 2 3 8 2 5 4 6 1 6 ...
##   ..$ nmatch   : int [1:3704] 29 29 29 54 29 29 29 49 29 49 ...
##   ..$ nmismatch: int [1:3704] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:3704] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:3704] 2 2 2 2 2 2 1 2 2 2 ...
##   ..$ accept   : logi [1:3704] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ M3_2022_R1_filtered_2022.fastq.gz :'data.frame':  6507 obs. of  9 variables:
##   ..$ sequence : chr [1:6507] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:6507] 2302 1632 1506 1392 1130 842 831 711 642 556 ...
##   ..$ forward  : int [1:6507] 1 3 4 5 7 2 8 6 2 10 ...
##   ..$ reverse  : int [1:6507] 2 1 3 3 6 4 1 7 5 9 ...
##   ..$ nmatch   : int [1:6507] 29 29 49 49 54 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:6507] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6507] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6507] 1 2 2 2 2 1 2 1 1 2 ...
##   ..$ accept   : logi [1:6507] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ S1_2022_R1_filtered_2022.fastq.gz :'data.frame':  5811 obs. of  9 variables:
##   ..$ sequence : chr [1:5811] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "AGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGCTTTCGGGTCGTAAAACTCTGTTGTTAGGGAAGAACAAGTACGAGAGTAACT"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTAGGGTTGTAAACCCCTTTCAGCGGGGAAGATAATGACGGTACCCGCAG"| __truncated__ ...
##   ..$ abundance: int [1:5811] 830 730 608 363 264 257 245 218 186 173 ...
##   ..$ forward  : int [1:5811] 2 1 3 5 12 11 6 9 13 16 ...
##   ..$ reverse  : int [1:5811] 1 1 2 3 27 7 2 3 4 5 ...
##   ..$ nmatch   : int [1:5811] 49 49 29 54 48 49 29 54 37 54 ...
##   ..$ nmismatch: int [1:5811] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5811] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5811] 2 2 2 2 1 2 2 2 2 2 ...
##   ..$ accept   : logi [1:5811] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ B1_2022_R1_filtered_2022.fastq.gz :'data.frame':  4188 obs. of  9 variables:
##   ..$ sequence : chr [1:4188] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:4188] 3221 1425 1224 735 643 599 599 589 473 468 ...
##   ..$ forward  : int [1:4188] 1 2 4 3 8 6 5 7 11 3 ...
##   ..$ reverse  : int [1:4188] 1 3 2 1 6 2 5 6 2 4 ...
##   ..$ nmatch   : int [1:4188] 29 29 29 29 49 29 29 49 29 29 ...
##   ..$ nmismatch: int [1:4188] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:4188] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:4188] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:4188] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ B9_2022_R1_filtered_2022.fastq.gz :'data.frame':  4367 obs. of  9 variables:
##   ..$ sequence : chr [1:4367] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:4367] 1322 1248 1080 602 385 346 315 297 291 246 ...
##   ..$ forward  : int [1:4367] 2 1 3 5 7 8 1 4 1 15 ...
##   ..$ reverse  : int [1:4367] 1 2 3 4 4 10 9 8 5 11 ...
##   ..$ nmatch   : int [1:4367] 29 29 54 49 49 49 29 29 29 51 ...
##   ..$ nmismatch: int [1:4367] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:4367] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:4367] 2 1 2 2 2 2 1 1 1 2 ...
##   ..$ accept   : logi [1:4367] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G7_2022_R1_filtered_2022.fastq.gz :'data.frame':  5039 obs. of  9 variables:
##   ..$ sequence : chr [1:5039] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGCAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:5039] 1900 875 688 388 382 378 332 279 263 234 ...
##   ..$ forward  : int [1:5039] 1 2 3 6 5 4 7 11 10 12 ...
##   ..$ reverse  : int [1:5039] 1 2 2 8 3 4 5 10 6 12 ...
##   ..$ nmatch   : int [1:5039] 54 49 49 49 29 29 29 54 54 29 ...
##   ..$ nmismatch: int [1:5039] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5039] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5039] 2 2 2 1 2 1 2 2 2 2 ...
##   ..$ accept   : logi [1:5039] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G15_2022_R1_filtered_2022.fastq.gz:'data.frame':  6111 obs. of  9 variables:
##   ..$ sequence : chr [1:6111] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGTAGGGAGGAAAGGGTGTAACTTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:6111] 4167 3071 1463 1314 1020 980 845 727 705 542 ...
##   ..$ forward  : int [1:6111] 1 2 3 5 6 8 7 4 1 12 ...
##   ..$ reverse  : int [1:6111] 1 4 3 2 3 2 5 7 8 9 ...
##   ..$ nmatch   : int [1:6111] 29 54 29 29 29 29 29 29 29 49 ...
##   ..$ nmismatch: int [1:6111] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6111] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6111] 2 2 2 2 2 2 1 1 1 2 ...
##   ..$ accept   : logi [1:6111] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G23_2022_R1_filtered_2022.fastq.gz:'data.frame':  3798 obs. of  9 variables:
##   ..$ sequence : chr [1:3798] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:3798] 4981 775 769 623 587 557 418 410 375 350 ...
##   ..$ forward  : int [1:3798] 1 2 4 5 6 3 9 8 7 11 ...
##   ..$ reverse  : int [1:3798] 1 2 3 2 3 4 6 2 5 2 ...
##   ..$ nmatch   : int [1:3798] 54 29 49 29 49 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:3798] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:3798] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:3798] 2 2 2 2 2 1 2 2 2 2 ...
##   ..$ accept   : logi [1:3798] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G31_2022_R1_filtered_2022.fastq.gz:'data.frame':  5562 obs. of  9 variables:
##   ..$ sequence : chr [1:5562] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:5562] 4691 2408 2362 2322 1410 1051 870 849 839 793 ...
##   ..$ forward  : int [1:5562] 1 3 2 1 1 8 10 2 1 4 ...
##   ..$ reverse  : int [1:5562] 1 3 2 4 2 6 3 1 5 5 ...
##   ..$ nmatch   : int [1:5562] 29 29 29 29 29 34 29 29 29 29 ...
##   ..$ nmismatch: int [1:5562] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5562] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5562] 1 2 2 1 1 2 2 2 1 2 ...
##   ..$ accept   : logi [1:5562] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ M4_2022_R1_filtered_2022.fastq.gz :'data.frame':  5937 obs. of  9 variables:
##   ..$ sequence : chr [1:5937] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "AGGGAATATTGGACAATGGGCGCAAGCCTGATCCAGCAATGCCGCGTGAGTGATGAAGGCCTTAGGGTTGTAAAGCTCTTTTACCCGAGATGATAATGACAGTATCGGGAG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:5937] 7270 2414 1867 1525 1442 1294 1208 802 796 686 ...
##   ..$ forward  : int [1:5937] 1 3 4 6 5 2 7 2 8 9 ...
##   ..$ reverse  : int [1:5937] 1 1 2 1 4 3 1 1 1 1 ...
##   ..$ nmatch   : int [1:5937] 29 29 54 29 54 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:5937] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5937] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5937] 2 2 2 2 2 1 2 2 2 2 ...
##   ..$ accept   : logi [1:5937] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ S2_2022_R1_filtered_2022.fastq.gz :'data.frame':  1878 obs. of  9 variables:
##   ..$ sequence : chr [1:1878] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "AGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGCTTTCGGGTCGTAAAACTCTGTTGTTAGGGAAGAACAAGTACGAGAGTAACT"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTAGGGTTGTAAACCCCTTTCAGCGGGGAAGATAATGACGGTACCCGCAG"| __truncated__ ...
##   ..$ abundance: int [1:1878] 427 341 341 228 155 149 144 126 103 94 ...
##   ..$ forward  : int [1:1878] 1 2 13 3 4 5 8 42 220 6 ...
##   ..$ reverse  : int [1:1878] 1 2 1 3 2 4 7 610 3 11 ...
##   ..$ nmatch   : int [1:1878] 49 29 49 54 29 54 49 54 54 37 ...
##   ..$ nmismatch: int [1:1878] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:1878] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:1878] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:1878] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ B2_2022_R1_filtered_2022.fastq.gz :'data.frame':  3835 obs. of  9 variables:
##   ..$ sequence : chr [1:3835] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:3835] 1442 985 685 512 507 497 490 437 408 286 ...
##   ..$ forward  : int [1:3835] 2 1 3 7 4 5 6 11 10 15 ...
##   ..$ reverse  : int [1:3835] 1 2 4 3 1 3 5 7 6 1 ...
##   ..$ nmatch   : int [1:3835] 29 29 54 49 29 49 54 49 29 29 ...
##   ..$ nmismatch: int [1:3835] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:3835] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:3835] 2 1 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:3835] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ B10_2022_R1_filtered_2022.fastq.gz:'data.frame':  3650 obs. of  9 variables:
##   ..$ sequence : chr [1:3650] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGATAATAC"| __truncated__ ...
##   ..$ abundance: int [1:3650] 3851 3030 2261 1054 949 693 655 606 415 391 ...
##   ..$ forward  : int [1:3650] 1 2 3 4 6 7 5 10 9 49 ...
##   ..$ reverse  : int [1:3650] 1 2 2 1 3 4 1 7 6 2 ...
##   ..$ nmatch   : int [1:3650] 29 49 49 29 29 29 29 49 29 49 ...
##   ..$ nmismatch: int [1:3650] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:3650] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:3650] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:3650] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G8_2022_R1_filtered_2022.fastq.gz :'data.frame':  3069 obs. of  9 variables:
##   ..$ sequence : chr [1:3069] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:3069] 1055 1029 1014 947 609 491 435 381 361 331 ...
##   ..$ forward  : int [1:3069] 2 1 3 4 6 5 8 10 11 9 ...
##   ..$ reverse  : int [1:3069] 4 2 3 3 1 2 1 1 1 6 ...
##   ..$ nmatch   : int [1:3069] 29 29 49 49 29 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:3069] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:3069] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:3069] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:3069] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G16_2022_R1_filtered_2022.fastq.gz:'data.frame':  5764 obs. of  9 variables:
##   ..$ sequence : chr [1:5764] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GAGGAATATTGGACAATGGGTGAGAGCCTGATCCAGCCATCCCGCGTGAAGGACTAAGGCCCTATGGGTTGTAAACTTCTTTTATACTGGGATAAACCTACTTACGTGTAA"| __truncated__ ...
##   ..$ abundance: int [1:5764] 4317 2800 1592 1329 1324 1254 1186 1096 944 932 ...
##   ..$ forward  : int [1:5764] 2 3 1 8 6 11 5 12 1 1 ...
##   ..$ reverse  : int [1:5764] 1 1 2 6 1 1 7 1 4 9 ...
##   ..$ nmatch   : int [1:5764] 29 29 29 34 29 29 34 29 29 29 ...
##   ..$ nmismatch: int [1:5764] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5764] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5764] 2 2 1 2 2 2 1 2 1 1 ...
##   ..$ accept   : logi [1:5764] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G24_2022_R1_filtered_2022.fastq.gz:'data.frame':  6675 obs. of  9 variables:
##   ..$ sequence : chr [1:6675] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGTAGGGAGGAAAGGGTGTAACTTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ ...
##   ..$ abundance: int [1:6675] 4489 3076 1285 1218 1108 1053 1032 849 772 765 ...
##   ..$ forward  : int [1:6675] 1 2 3 7 4 8 6 9 5 10 ...
##   ..$ reverse  : int [1:6675] 1 3 2 6 5 8 2 4 7 4 ...
##   ..$ nmatch   : int [1:6675] 29 29 29 54 29 49 29 49 29 49 ...
##   ..$ nmismatch: int [1:6675] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6675] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6675] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:6675] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G32_2022_R1_filtered_2022.fastq.gz:'data.frame':  1430 obs. of  9 variables:
##   ..$ sequence : chr [1:1430] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGCAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:1430] 1102 769 601 355 349 328 237 175 150 148 ...
##   ..$ forward  : int [1:1430] 1 2 4 9 5 3 6 12 11 8 ...
##   ..$ reverse  : int [1:1430] 1 1 5 8 3 2 2 6 2 4 ...
##   ..$ nmatch   : int [1:1430] 49 49 54 49 29 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:1430] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:1430] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:1430] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:1430] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ M5_2022_R1_filtered_2022.fastq.gz :'data.frame':  8678 obs. of  9 variables:
##   ..$ sequence : chr [1:8678] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAAGCGAATAC"| __truncated__ ...
##   ..$ abundance: int [1:8678] 3929 3927 3927 3264 3188 1449 1172 887 883 794 ...
##   ..$ forward  : int [1:8678] 4 3 5 1 2 6 1 8 1 9 ...
##   ..$ reverse  : int [1:8678] 2 1 2 3 1 4 1 5 6 5 ...
##   ..$ nmatch   : int [1:8678] 49 29 49 29 29 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:8678] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:8678] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:8678] 2 2 2 1 2 2 2 2 1 2 ...
##   ..$ accept   : logi [1:8678] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ S3_2022_R1_filtered_2022.fastq.gz :'data.frame':  3784 obs. of  9 variables:
##   ..$ sequence : chr [1:3784] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "AGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGCTTTCGGGTCGTAAAACTCTGTTGTTAGGGAAGAACAAGTACGAGAGTAACT"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTAGGGTTGTAAACCCCTTTCAGCGGGGAAGATAATGACGGTACCCGCAG"| __truncated__ ...
##   ..$ abundance: int [1:3784] 487 472 377 286 239 154 150 145 137 137 ...
##   ..$ forward  : int [1:3784] 1 3 2 4 7 14 8 15 16 11 ...
##   ..$ reverse  : int [1:3784] 1 1 2 3 3 4 2 19 8 6 ...
##   ..$ nmatch   : int [1:3784] 49 49 29 54 54 51 29 49 29 54 ...
##   ..$ nmismatch: int [1:3784] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:3784] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:3784] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:3784] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ B3_2022_R1_filtered_2022.fastq.gz :'data.frame':  5754 obs. of  9 variables:
##   ..$ sequence : chr [1:5754] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ ...
##   ..$ abundance: int [1:5754] 731 651 615 427 420 373 324 298 273 262 ...
##   ..$ forward  : int [1:5754] 2 3 1 5 4 9 7 6 10 13 ...
##   ..$ reverse  : int [1:5754] 1 1 3 5 7 4 2 2 9 6 ...
##   ..$ nmatch   : int [1:5754] 49 49 29 54 54 51 29 29 54 54 ...
##   ..$ nmismatch: int [1:5754] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5754] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5754] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:5754] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G1_2022_R1_filtered_2022.fastq.gz :'data.frame':  6588 obs. of  9 variables:
##   ..$ sequence : chr [1:6588] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:6588] 5001 1831 1472 1400 1055 1015 953 953 888 780 ...
##   ..$ forward  : int [1:6588] 1 2 5 6 9 10 3 8 11 7 ...
##   ..$ reverse  : int [1:6588] 1 1 6 3 1 3 5 2 1 7 ...
##   ..$ nmatch   : int [1:6588] 29 29 54 49 29 49 29 29 29 29 ...
##   ..$ nmismatch: int [1:6588] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6588] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6588] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:6588] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G9_2022_R1_filtered_2022.fastq.gz :'data.frame':  5401 obs. of  9 variables:
##   ..$ sequence : chr [1:5401] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:5401] 11407 2544 1160 986 945 569 517 505 500 471 ...
##   ..$ forward  : int [1:5401] 1 2 5 6 4 13 1 10 3 11 ...
##   ..$ reverse  : int [1:5401] 1 2 3 3 4 9 10 3 5 3 ...
##   ..$ nmatch   : int [1:5401] 54 29 29 29 29 49 54 29 29 29 ...
##   ..$ nmismatch: int [1:5401] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5401] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5401] 1 2 2 2 1 2 1 2 1 2 ...
##   ..$ accept   : logi [1:5401] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G17_2022_R1_filtered_2022.fastq.gz:'data.frame':  6930 obs. of  9 variables:
##   ..$ sequence : chr [1:6930] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTGGTTGGCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCAGTAACTTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:6930] 6375 2959 1840 1264 1216 1042 893 865 795 707 ...
##   ..$ forward  : int [1:6930] 1 2 4 5 3 6 10 7 11 8 ...
##   ..$ reverse  : int [1:6930] 2 1 1 3 3 6 1 5 1 3 ...
##   ..$ nmatch   : int [1:6930] 54 29 29 29 29 29 29 29 29 29 ...
##   ..$ nmismatch: int [1:6930] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6930] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6930] 2 2 2 2 2 1 2 2 2 2 ...
##   ..$ accept   : logi [1:6930] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G25_2022_R1_filtered_2022.fastq.gz:'data.frame':  7429 obs. of  9 variables:
##   ..$ sequence : chr [1:7429] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCCATTACCTAATAC"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:7429] 19022 3456 3078 3048 2755 2098 1871 1489 1452 1050 ...
##   ..$ forward  : int [1:7429] 1 2 1 3 2 4 1 3 2 6 ...
##   ..$ reverse  : int [1:7429] 1 2 2 2 1 3 4 1 6 7 ...
##   ..$ nmatch   : int [1:7429] 29 29 29 29 29 54 29 29 29 34 ...
##   ..$ nmismatch: int [1:7429] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:7429] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:7429] 2 2 1 2 2 2 1 2 1 2 ...
##   ..$ accept   : logi [1:7429] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ G33_2022_R1_filtered_2022.fastq.gz:'data.frame':  6853 obs. of  9 variables:
##   ..$ sequence : chr [1:6853] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGG"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATAC"| __truncated__ ...
##   ..$ abundance: int [1:6853] 1539 1375 1125 567 497 442 440 436 353 341 ...
##   ..$ forward  : int [1:6853] 1 2 3 4 6 9 7 5 8 10 ...
##   ..$ reverse  : int [1:6853] 1 1 3 4 2 8 7 5 6 12 ...
##   ..$ nmatch   : int [1:6853] 49 49 54 29 29 49 29 29 29 54 ...
##   ..$ nmismatch: int [1:6853] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:6853] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:6853] 2 2 2 2 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:6853] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ V1_2022_R1_filtered_2022.fastq.gz :'data.frame':  5647 obs. of  9 variables:
##   ..$ sequence : chr [1:5647] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATAC"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCCATGCCGCGTGTATGAAGAAGGCCTTCGGGTTGTAAAGTACTTTCAGCGAGGAGGAAGGCATTGTGGTTAATAA"| __truncated__ ...
##   ..$ abundance: int [1:5647] 1970 937 770 744 693 665 593 490 453 421 ...
##   ..$ forward  : int [1:5647] 1 4 5 3 7 6 2 8 11 9 ...
##   ..$ reverse  : int [1:5647] 2 3 1 5 3 1 4 7 11 1 ...
##   ..$ nmatch   : int [1:5647] 29 49 29 29 49 29 29 54 54 29 ...
##   ..$ nmismatch: int [1:5647] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:5647] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:5647] 2 2 2 1 2 2 2 2 2 2 ...
##   ..$ accept   : logi [1:5647] TRUE TRUE TRUE TRUE TRUE TRUE ...
##  $ S4_2022_R1_filtered_2022.fastq.gz :'data.frame':  2606 obs. of  9 variables:
##   ..$ sequence : chr [1:2606] "AGGGAATCTTCCGCAATGGACGAAAGTCTGACGGAGCAACGCCGCGTGAGTGATGAAGGCTTTCGGGTCGTAAAACTCTGTTGTTAGGGAAGAACAAGTACGAGAGTAACT"| __truncated__ "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGGGTGAAGAAGGCCTTAGGGTTGTAAACCCCTTTCAGCGGGGAAGATAATGACGGTACCCGCAG"| __truncated__ "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACC"| __truncated__ ...
##   ..$ abundance: int [1:2606] 265 231 226 213 162 153 129 127 126 125 ...
##   ..$ forward  : int [1:2606] 1 2 3 234 11 6 19 5 18 86 ...
##   ..$ reverse  : int [1:2606] 2 1 3 3 24 1 18 7 4 2 ...
##   ..$ nmatch   : int [1:2606] 29 54 49 49 49 54 48 54 51 29 ...
##   ..$ nmismatch: int [1:2606] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ nindel   : int [1:2606] 0 0 0 0 0 0 0 0 0 0 ...
##   ..$ prefer   : num [1:2606] 2 2 2 2 1 2 2 2 2 2 ...
##   ..$ accept   : logi [1:2606] TRUE TRUE TRUE TRUE TRUE TRUE ...

Create Raw ASV Count Table

# Create the ASV Count Table 
raw_ASV_table2 <- makeSequenceTable(merged_ASVs2)

# Write out the file to data/01_DADA2

raw_ASV_table2_path <- "data/01_DADA2/raw_ASV_table2"

# Check the type and dimensions of the data
dim(raw_ASV_table2)
## [1]     59 169619
class(raw_ASV_table2)
## [1] "matrix" "array"
typeof(raw_ASV_table2)
## [1] "integer"
# Inspect the distribution of sequence lengths of all ASVs in dataset 
table(nchar(getSequences(raw_ASV_table2)))
## 
##   242   245   247   248   253   255   256   259   265   266   267   275   280 
##     1     1     1     1     1     1     1     4     1     4     1     1     1 
##   295   314   315   316   318   319   322   325   332   336   340   342   344 
##     1     1     1     1     1     1     1     3     1     3     1     7     1 
##   348   354   355   357   358   359   362   363   364   366   367   369   370 
##     1     1     4     6     3     1     1     3     1     1     1    14     1 
##   371   372   373   376   377   379   381   382   383   385   386   388   389 
##     1     2     5     2     1    13     1     7     8     3     3     3     8 
##   390   391   393   394   395   396   398   399   400   401   402   403   404 
##     2     1     1     1     5     2     6    42   336 21390  4103  1477  1435 
##   405   406   407   408   409   410   411   412   413   414   415   416   417 
##   301 15705   672   612   399   492    82   345   127   241   490   274   135 
##   418   419   420   421   422   423   424   425   426   427   428   429   430 
##   918   869   744  6034   393  1571  1493 14909 77420 15592   742    34    15 
##   431   432   433   434   435   436   437   438   442   443 
##     1     3     5     1     3    19     3    12     3    22
# Inspect the distribution of sequence lengths of all ASVs in dataset 
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table2))) %>%
  ggplot(aes(x = Seq_Length )) + 
  geom_histogram() + 
  labs(title = "Raw distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

TRIM THE ASVS

Let’s trim the ASVs to only be the right size, which is somewhere in the neighborhood of 400 (its from the v3 v4 region, which can vary in length depending on the taxon)

# We will allow for a few 
raw_ASV_table_trimmed2 <- raw_ASV_table2[,nchar(colnames(raw_ASV_table2)) %in% 350:450]

# Inspect the distribution of sequence lengths of all ASVs in dataset 
table(nchar(getSequences(raw_ASV_table_trimmed2)))
## 
##   354   355   357   358   359   362   363   364   366   367   369   370   371 
##     1     4     6     3     1     1     3     1     1     1    14     1     1 
##   372   373   376   377   379   381   382   383   385   386   388   389   390 
##     2     5     2     1    13     1     7     8     3     3     3     8     2 
##   391   393   394   395   396   398   399   400   401   402   403   404   405 
##     1     1     1     5     2     6    42   336 21390  4103  1477  1435   301 
##   406   407   408   409   410   411   412   413   414   415   416   417   418 
## 15705   672   612   399   492    82   345   127   241   490   274   135   918 
##   419   420   421   422   423   424   425   426   427   428   429   430   431 
##   869   744  6034   393  1571  1493 14909 77420 15592   742    34    15     1 
##   432   433   434   435   436   437   438   442   443 
##     3     5     1     3    19     3    12     3    22
# What proportion is left of the sequences? 
sum(raw_ASV_table_trimmed2)/sum(raw_ASV_table2)
## [1] 0.9999665
# Inspect the distribution of sequence lengths of all ASVs in dataset 
# AFTER TRIM
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table_trimmed2))) %>%
  ggplot(aes(x = Seq_Length )) + 
  geom_histogram() + 
  labs(title = "Trimmed distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# Note the peak at 249 is ABOVE 3000

# Let's zoom in on the plot 
data.frame(Seq_Length = nchar(getSequences(raw_ASV_table_trimmed2))) %>%
  ggplot(aes(x = Seq_Length )) + 
  geom_histogram() + 
  labs(title = "Trimmed distribution of ASV length") + 
  scale_y_continuous(limits = c(0, 500))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_bar()`).

I will keep these ASVs and potentially get more stringent later if it seems logical to do so.

Remove Chimeras

Sometimes chimeras arise in our workflow.

Chimeric sequences are artificial sequences formed by the combination of two or more distinct biological sequences. These chimeric sequences can arise during the polymerase chain reaction (PCR) amplification step of the 16S rRNA gene, where fragments from different templates can be erroneously joined together.

Chimera removal is an essential step in the analysis of 16S sequencing data to improve the accuracy of downstream analyses, such as taxonomic assignment and diversity assessment. It helps to avoid the inclusion of misleading or spurious sequences that could lead to incorrect biological interpretations.

# Remove the chimeras in the raw ASV table
noChimeras_ASV_table2 <- removeBimeraDenovo(raw_ASV_table_trimmed2, 
                                           method="consensus", 
                                           multithread=TRUE, verbose=TRUE)
## Identified 113004 bimeras out of 169576 input sequences.
# Check the dimensions
dim(noChimeras_ASV_table2)
## [1]    59 56572
# What proportion is left of the sequences? 
sum(noChimeras_ASV_table2)/sum(raw_ASV_table_trimmed2)
## [1] 0.7083524
sum(noChimeras_ASV_table2)/sum(raw_ASV_table2)
## [1] 0.7083287
# Plot it 
data.frame(Seq_Length_NoChim2 = nchar(getSequences(noChimeras_ASV_table2))) %>%
  ggplot(aes(x = Seq_Length_NoChim2 )) + 
  geom_histogram()+ 
  labs(title = "Trimmed + Chimera Removal distribution of ASV length")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

We retained 70% after trimming chimeras

Track the read counts

Here, we will look at the number of reads that were lost in the filtering, denoising, merging, and chimera removal.

# A little function to identify number seqs 
getN2 <- function(x) sum(getUniques(x))

# Make the table to track the seqs 
track2 <- cbind(filtered_reads2, 
               sapply(dada_forward2, getN2),
               sapply(dada_reverse2, getN2),
               sapply(merged_ASVs2, getN2),
               rowSums(noChimeras_ASV_table2))

head(track2)
##                                                                       reads.in
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz    204642
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz    175656
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz   138589
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz   202865
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz   202611
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz   188583
##                                                                       reads.out
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz     112436
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz     109166
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz     77962
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz    119856
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz    123024
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz    117249
##                                                                             
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz  101452
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz  102127
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz  71014
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 107161
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 109470
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 107939
##                                                                             
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz  102760
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz  103285
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz  71718
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 109968
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 112127
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 109250
##                                                                            
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz  49650
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz  70513
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 39986
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 56269
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 54587
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 66448
##                                                                            
## 13915_11671_187921_KTY92_Plate1_A06_B4_TTCCAAGG_CCTTGTAG_R1.fastq.gz  40089
## 13915_11671_187921_KTY92_Plate1_A07_G2_CGCATGAT_TCAGGCTT_R1.fastq.gz  48534
## 13915_11671_187921_KTY92_Plate1_A08_G10_ACGGAACA_GTTCTCGT_R1.fastq.gz 29166
## 13915_11671_187921_KTY92_Plate1_A09_G18_CGGCTAAT_AGAACGAG_R1.fastq.gz 43634
## 13915_11671_187921_KTY92_Plate1_A10_G26_ATCGATCG_TGCTTCCA_R1.fastq.gz 43943
## 13915_11671_187921_KTY92_Plate1_A11_G34_GCAAGATC_CTTCGACT_R1.fastq.gz 51221
# Update column names to be more informative (most are missing at the moment!)
colnames(track2) <- c("input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")
rownames(track2) <- samples2

# Generate a dataframe to track the reads through our DADA2 pipeline
track_read_counts2 <- 
  track2 %>%
  # make it a dataframe
  as.data.frame() %>%
  rownames_to_column(var = "names") %>%
  mutate(perc_reads_retained = 100 * nochim / input)

# Visualize it in table format 
DT::datatable(track_read_counts2)
# Plot it!
track_read_counts2 %>%
  pivot_longer(input:nochim, names_to = "read_type", values_to = "num_reads") %>%
  mutate(read_type = fct_relevel(read_type, 
                                 "input", "filtered", "denoisedF", "denoisedR", "merged", "nochim")) %>%
  ggplot(aes(x = read_type, y = num_reads, fill = read_type)) + 
  geom_line(aes(group = names), color = "grey") + 
  geom_point(shape = 21, size = 3, alpha = 0.8) + 
  scale_fill_brewer(palette = "Spectral") + 
  labs(x = "Filtering Step", y = "Number of Sequences") + 
  theme_bw()

There is quite a drastic drop from raw to filtered, trimmed, no chimeras data, but it seems like the sequencing depth was enough that this will still be acceptable.

Assign Taxonomy

Here, we will use the silva database version 138!

# Classify the ASVs against a reference set using the RDP Naive Bayesian Classifier described by Wang et al., (2007) in AEM
taxa_train2 <- 
  assignTaxonomy(noChimeras_ASV_table2, 
                 "/workdir/in_class_data/taxonomy/silva_nr99_v138.1_train_set.fa.gz", 
                 multithread=TRUE)

# Add the genus/species information 
taxa_addSpecies2 <- 
  addSpecies(taxa_train2, 
             "/workdir/in_class_data/taxonomy/silva_species_assignment_v138.1.fa.gz")

# Inspect the taxonomy 
taxa_print2 <- taxa_addSpecies2 # Removing sequence rownames for display only
rownames(taxa_print2) <- NULL
#View(taxa_print2)

Prepare the data for export!

1. ASV Table

Below, we will prepare the following:

  1. Two ASV Count tables:
    1. With ASV seqs: ASV headers include the entire ASV sequence ~250bps.
    2. with ASV names: This includes re-written and shortened headers like ASV_1, ASV_2, etc, which will match the names in our fasta file below.
  2. ASV_fastas: A fasta file that we can use to build a tree for phylogenetic analyses (e.g. phylogenetic alpha diversity metrics or UNIFRAC dissimilarty).

Finalize ASV Count Tables

########### 2. COUNT TABLE ###############
############## Modify the ASV names and then save a fasta file!  ############## 
# Give headers more manageable names
# First pull the ASV sequences
asv_seqs2 <- colnames(noChimeras_ASV_table2)
asv_seqs2[1:5]
## [1] "GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGGAGGAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGATGCGTAGGCGGACATTTAAGTCAGATGTGAAATACCCGAGCTTAACTTGGGTGCTGCATTTGAAACTGGGTGTCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGACTTACTGGACTGTAACTGACGCTGAGGCATGAAAGCGTGGGGAGCAAACA"                         
## [2] "GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATACGTATCAACTGTGACGTTACTCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTGGATAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTTAAAACTGTCCAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA"
## [3] "GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA"                    
## [4] "GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA"                    
## [5] "GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA"
# make headers for our ASV seq fasta file, which will be our asv names
asv_headers2 <- vector(dim(noChimeras_ASV_table2)[2], mode = "character")
asv_headers2[1:5]
## [1] "" "" "" "" ""
# loop through vector and fill it in with ASV names 
for (i in 1:dim(noChimeras_ASV_table2)[2]) {
  asv_headers2[i] <- paste(">ASV", i, sep = "_")
}

# intitution check
asv_headers2[1:5]
## [1] ">ASV_1" ">ASV_2" ">ASV_3" ">ASV_4" ">ASV_5"
##### Rename ASVs in table then write out our ASV fasta file! 
#View(noChimeras_ASV_table)
asv_tab2 <- t(noChimeras_ASV_table2)
#View(asv_tab2)

## Rename our asvs! 
row.names(asv_tab2) <- sub(">", "", asv_headers2)
#View(asv_tab2)

2. Taxonomy Table

# Inspect the taxonomy table
#View(taxa_addSpecies2)

##### Prepare tax table 
# Add the ASV sequences from the rownames to a column 
new_tax_tab2 <- 
  taxa_addSpecies2%>%
  as.data.frame() %>%
  rownames_to_column(var = "ASVseqs") 
head(new_tax_tab2)
##                                                                                                                                                                                                                                                                                                                                                                                                                                      ASVseqs
## 1                          GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGGAGGAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGATGCGTAGGCGGACATTTAAGTCAGATGTGAAATACCCGAGCTTAACTTGGGTGCTGCATTTGAAACTGGGTGTCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGACTTACTGGACTGTAACTGACGCTGAGGCATGAAAGCGTGGGGAGCAAACA
## 2 GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATACGTATCAACTGTGACGTTACTCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTGGATAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTTAAAACTGTCCAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA
## 3                     GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## 4                     GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## 5 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## 6 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACAAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
##    Kingdom           Phylum               Class            Order
## 1 Bacteria       Firmicutes          Clostridia    Clostridiales
## 2 Bacteria   Proteobacteria Gammaproteobacteria Enterobacterales
## 3 Bacteria Actinobacteriota      Actinobacteria    Micrococcales
## 4 Bacteria Actinobacteriota      Actinobacteria    Micrococcales
## 5 Bacteria   Proteobacteria Gammaproteobacteria  Pseudomonadales
## 6 Bacteria   Proteobacteria Gammaproteobacteria  Pseudomonadales
##             Family                        Genus Species
## 1   Clostridiaceae Clostridium sensu stricto 13    <NA>
## 2   Aeromonadaceae                    Aeromonas    <NA>
## 3   Micrococcaceae            Pseudarthrobacter    <NA>
## 4   Micrococcaceae            Pseudarthrobacter    <NA>
## 5 Pseudomonadaceae                  Pseudomonas    <NA>
## 6 Pseudomonadaceae                  Pseudomonas    <NA>
# intution check 
stopifnot(new_tax_tab2$ASVseqs == colnames(noChimeras_ASV_table2))

# Now let's add the ASV names 
rownames(new_tax_tab2) <- rownames(asv_tab2)
head(new_tax_tab2)
##                                                                                                                                                                                                                                                                                                                                                                                                                                          ASVseqs
## ASV_1                          GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGGAGGAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGATGCGTAGGCGGACATTTAAGTCAGATGTGAAATACCCGAGCTTAACTTGGGTGCTGCATTTGAAACTGGGTGTCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGACTTACTGGACTGTAACTGACGCTGAGGCATGAAAGCGTGGGGAGCAAACA
## ASV_2 GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATACGTATCAACTGTGACGTTACTCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTGGATAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTTAAAACTGTCCAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA
## ASV_3                     GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## ASV_4                     GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## ASV_5 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## ASV_6 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACAAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
##        Kingdom           Phylum               Class            Order
## ASV_1 Bacteria       Firmicutes          Clostridia    Clostridiales
## ASV_2 Bacteria   Proteobacteria Gammaproteobacteria Enterobacterales
## ASV_3 Bacteria Actinobacteriota      Actinobacteria    Micrococcales
## ASV_4 Bacteria Actinobacteriota      Actinobacteria    Micrococcales
## ASV_5 Bacteria   Proteobacteria Gammaproteobacteria  Pseudomonadales
## ASV_6 Bacteria   Proteobacteria Gammaproteobacteria  Pseudomonadales
##                 Family                        Genus Species
## ASV_1   Clostridiaceae Clostridium sensu stricto 13    <NA>
## ASV_2   Aeromonadaceae                    Aeromonas    <NA>
## ASV_3   Micrococcaceae            Pseudarthrobacter    <NA>
## ASV_4   Micrococcaceae            Pseudarthrobacter    <NA>
## ASV_5 Pseudomonadaceae                  Pseudomonas    <NA>
## ASV_6 Pseudomonadaceae                  Pseudomonas    <NA>
### Final prep of tax table. Add new column with ASV names 
asv_tax2 <- 
  new_tax_tab2 %>%
  # add rownames from count table for phyloseq handoff
  mutate(ASV = rownames(asv_tab2)) %>%
  # Resort the columns with select
  dplyr::select(Kingdom, Phylum, Class, Order, Family, Genus, Species, ASV, ASVseqs)

head(asv_tax2)
##        Kingdom           Phylum               Class            Order
## ASV_1 Bacteria       Firmicutes          Clostridia    Clostridiales
## ASV_2 Bacteria   Proteobacteria Gammaproteobacteria Enterobacterales
## ASV_3 Bacteria Actinobacteriota      Actinobacteria    Micrococcales
## ASV_4 Bacteria Actinobacteriota      Actinobacteria    Micrococcales
## ASV_5 Bacteria   Proteobacteria Gammaproteobacteria  Pseudomonadales
## ASV_6 Bacteria   Proteobacteria Gammaproteobacteria  Pseudomonadales
##                 Family                        Genus Species   ASV
## ASV_1   Clostridiaceae Clostridium sensu stricto 13    <NA> ASV_1
## ASV_2   Aeromonadaceae                    Aeromonas    <NA> ASV_2
## ASV_3   Micrococcaceae            Pseudarthrobacter    <NA> ASV_3
## ASV_4   Micrococcaceae            Pseudarthrobacter    <NA> ASV_4
## ASV_5 Pseudomonadaceae                  Pseudomonas    <NA> ASV_5
## ASV_6 Pseudomonadaceae                  Pseudomonas    <NA> ASV_6
##                                                                                                                                                                                                                                                                                                                                                                                                                                          ASVseqs
## ASV_1                          GGGGAATATTGCGCAATGGGGGAAACCCTGACGCAGCAACGCCGCGTGAATGATGAAGGCCTTCGGGTTGTAAAGTTCTGTCTTCTGGGACGATAATGACGGTACCAGAGGAGGAAGCCACGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTACTGGGCGTAAAGGATGCGTAGGCGGACATTTAAGTCAGATGTGAAATACCCGAGCTTAACTTGGGTGCTGCATTTGAAACTGGGTGTCTAGAGTGCAGGAGAGGTAAGTGGAATTCCTAGTGTAGCGGTGAAATGCGTAGAGATTAGGAAGAACACCAGTGGCGAAGGCGACTTACTGGACTGTAACTGACGCTGAGGCATGAAAGCGTGGGGAGCAAACA
## ASV_2 GGGGAATATTGCACAATGGGGGAAACCCTGATGCAGCCATGCCGCGTGTGTGAAGAAGGCCTTCGGGTTGTAAAGCACTTTCAGCGAGGAGGAAAGGTTGATGCTTAATACGTATCAACTGTGACGTTACTCGCAGAAGAAGCACCGGCTAACTCCGTGCCAGCAGCCGCGGTAATACGGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCACGCAGGCGGTTGGATAAGTTAGATGTGAAAGCCCCGGGCTCAACCTGGGAATTGCATTTAAAACTGTCCAGCTAGAGTCTTGTAGAGGGGGGTAGAATTCCAGGTGTAGCGGTGAAATGCGTAGAGATCTGGAGGAATACCGGTGGCGAAGGCGGCCCCCTGGACAAAGACTGACGCTCAGGTGCGAAAGCGTGGGGAGCAAACA
## ASV_3                     GGGGAATATTGCACAATGGGCGAAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## ASV_4                     GGGGAATATTGCACAATGGGCGCAAGCCTGATGCAGCGACGCCGCGTGAGGGATGACGGCCTTCGGGTTGTAAACCTCTTTCAGTAGGGAAGAAGCGAAAGTGACGGTACCTGCAGAAGAAGCGCCGGCTAACTACGTGCCAGCAGCCGCGGTAATACGTAGGGCGCAAGCGTTATCCGGAATTATTGGGCGTAAAGAGCTCGTAGGCGGTTTGTCGCGTCTGCCGTGAAAGTCCGGGGCTCAACTCCGGATCTGCGGTGGGTACGGGCAGACTAGAGTGATGTAGGGGAGACTGGAATTCCTGGTGTAGCGGTGAAATGCGCAGATATCAGGAGGAACACCGATGGCGAAGGCAGGTCTCTGGGCATTAACTGACGCTGAGGAGCGAAAGCATGGGGAGCGAACA
## ASV_5 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTTACCTAATACGTAAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTCGTTAAGTTGGATGTGAAATCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGTCGAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
## ASV_6 GGGGAATATTGGACAATGGGCGAAAGCCTGATCCAGCCATGCCGCGTGTGTGAAGAAGGTCTTCGGATTGTAAAGCACTTTAAGTTGGGAGGAAGGGCATTAACCTAATACGTTAGTGTTTTGACGTTACCGACAGAATAAGCACCGGCTAACTCTGTGCCAGCAGCCGCGGTAATACAGAGGGTGCAAGCGTTAATCGGAATTACTGGGCGTAAAGCGCGCGTAGGTGGTTTGTTAAGTTGGATGTGAAAGCCCCGGGCTCAACCTGGGAACTGCATTCAAAACTGACAAGCTAGAGTATGGTAGAGGGTGGTGGAATTTCCTGTGTAGCGGTGAAATGCGTAGATATAGGAAGGAACACCAGTGGCGAAGGCGACCACCTGGACTGATACTGACACTGAGGTGCGAAAGCGTGGGGAGCAAACA
# Intution check
stopifnot(asv_tax2$ASV == rownames(asv_tax2), rownames(asv_tax2) == rownames(asv_tab2))

Write 01_DADA2 files

Now, we will write the files! We will write the following to the data/01_DADA2/ folder. We will save both as files that could be submitted as supplements AND as .RData objects for easy loading into the next steps into R.:

  1. ASV_counts.tsv: ASV count table that has ASV names that are re-written and shortened headers like ASV_1, ASV_2, etc, which will match the names in our fasta file below. This will also be saved as data/01_DADA2/ASV_counts.RData.
  2. ASV_counts_withSeqNames.tsv: This is generated with the data object in this file known as noChimeras_ASV_table. ASV headers include the entire ASV sequence ~250bps. In addition, we will save this as a .RData object as data/01_DADA2/noChimeras_ASV_table.RData as we will use this data in analysis/02_Taxonomic_Assignment.Rmd to assign the taxonomy from the sequence headers.
  3. ASVs.fasta: A fasta file output of the ASV names from ASV_counts.tsv and the sequences from the ASVs in ASV_counts_withSeqNames.tsv. A fasta file that we can use to build a tree for phylogenetic analyses (e.g. phylogenetic alpha diversity metrics or UNIFRAC dissimilarty).
  4. We will also make a copy of ASVs.fasta in data/02_TaxAss_FreshTrain/ to be used for the taxonomy classification in the next step in the workflow.
  5. Write out the taxonomy table
  6. track_read_counts.RData: To track how many reads we lost throughout our workflow that could be used and plotted later. We will add this to the metadata in analysis/02_Taxonomic_Assignment.Rmd.
# FIRST, we will save our output as regular files, which will be useful later on. 
# Save to regular .tsv file 
# Write BOTH the modified and unmodified ASV tables to a file!
# Write count table with ASV numbered names (e.g. ASV_1, ASV_2, etc)
write.table(asv_tab2, "data/01_DADA2/ASV_counts2.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write count table with ASV sequence names
write.table(noChimeras_ASV_table2, "data/01_DADA2/ASV_counts_withSeqNames2.tsv", sep = "\t", quote = FALSE, col.names = NA)
# Write out the fasta file for reference later on for what seq matches what ASV
asv_fasta2 <- c(rbind(asv_headers2, asv_seqs2))
# Save to a file!
write(asv_fasta2, "data/01_DADA2/ASVs.fasta2")


# SECOND, let's save the taxonomy tables 
# Write the table 
write.table(asv_tax2, "data/01_DADA2/ASV_taxonomy2.tsv", sep = "\t", quote = FALSE, col.names = NA)


# THIRD, let's save to a RData object 
# Each of these files will be used in the analysis/02_Taxonomic_Assignment
# RData objects are for easy loading :) 
save(noChimeras_ASV_table2, file = "data/01_DADA2/noChimeras_ASV_table2.RData")
save(asv_tab2, file = "data/01_DADA2/ASV_counts2.RData")
# And save the track_counts_df a R object, which we will merge with metadata information in the next step of the analysis in analysis/02_Taxonomic_Assignment. 
save(track_read_counts2, file = "data/01_DADA2/track_read_counts2.RData")

Session Information

# Ensure reproducibility 
devtools::session_info()
## ─ Session info ───────────────────────────────────────────────────────────────
##  setting  value
##  version  R version 4.3.2 (2023-10-31)
##  os       Rocky Linux 9.0 (Blue Onyx)
##  system   x86_64, linux-gnu
##  ui       X11
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/New_York
##  date     2024-05-02
##  pandoc   3.1.1 @ /usr/lib/rstudio-server/bin/quarto/bin/tools/ (via rmarkdown)
## 
## ─ Packages ───────────────────────────────────────────────────────────────────
##  package              * version    date (UTC) lib source
##  abind                  1.4-5      2016-07-21 [2] CRAN (R 4.3.2)
##  ade4                   1.7-22     2023-02-06 [1] CRAN (R 4.3.2)
##  ape                    5.7-1      2023-03-13 [2] CRAN (R 4.3.2)
##  Biobase                2.62.0     2023-10-24 [2] Bioconductor
##  BiocGenerics           0.48.1     2023-11-01 [2] Bioconductor
##  BiocManager          * 1.30.22    2023-08-08 [2] CRAN (R 4.3.2)
##  BiocParallel           1.36.0     2023-10-24 [2] Bioconductor
##  biomformat             1.30.0     2023-10-24 [1] Bioconductor
##  Biostrings             2.70.1     2023-10-25 [2] Bioconductor
##  bitops                 1.0-7      2021-04-24 [2] CRAN (R 4.3.2)
##  bslib                  0.5.1      2023-08-11 [2] CRAN (R 4.3.2)
##  cachem                 1.0.8      2023-05-01 [2] CRAN (R 4.3.2)
##  callr                  3.7.3      2022-11-02 [2] CRAN (R 4.3.2)
##  cli                    3.6.1      2023-03-23 [2] CRAN (R 4.3.2)
##  cluster                2.1.4      2022-08-22 [2] CRAN (R 4.3.2)
##  codetools              0.2-19     2023-02-01 [2] CRAN (R 4.3.2)
##  colorspace             2.1-0      2023-01-23 [2] CRAN (R 4.3.2)
##  crayon                 1.5.2      2022-09-29 [2] CRAN (R 4.3.2)
##  crosstalk              1.2.0      2021-11-04 [2] CRAN (R 4.3.2)
##  dada2                * 1.30.0     2023-10-24 [1] Bioconductor
##  data.table             1.14.8     2023-02-17 [2] CRAN (R 4.3.2)
##  DelayedArray           0.28.0     2023-10-24 [2] Bioconductor
##  deldir                 1.0-9      2023-05-17 [2] CRAN (R 4.3.2)
##  devtools             * 2.4.5      2022-10-11 [1] CRAN (R 4.3.2)
##  digest                 0.6.33     2023-07-07 [2] CRAN (R 4.3.2)
##  dplyr                * 1.1.3      2023-09-03 [2] CRAN (R 4.3.2)
##  DT                   * 0.32       2024-02-19 [1] CRAN (R 4.3.2)
##  ellipsis               0.3.2      2021-04-29 [2] CRAN (R 4.3.2)
##  evaluate               0.23       2023-11-01 [2] CRAN (R 4.3.2)
##  fansi                  1.0.5      2023-10-08 [2] CRAN (R 4.3.2)
##  farver                 2.1.1      2022-07-06 [2] CRAN (R 4.3.2)
##  fastmap                1.1.1      2023-02-24 [2] CRAN (R 4.3.2)
##  forcats              * 1.0.0      2023-01-29 [1] CRAN (R 4.3.2)
##  foreach                1.5.2      2022-02-02 [2] CRAN (R 4.3.2)
##  fs                     1.6.3      2023-07-20 [2] CRAN (R 4.3.2)
##  generics               0.1.3      2022-07-05 [2] CRAN (R 4.3.2)
##  GenomeInfoDb           1.38.0     2023-10-24 [2] Bioconductor
##  GenomeInfoDbData       1.2.11     2023-11-07 [2] Bioconductor
##  GenomicAlignments      1.38.0     2023-10-24 [2] Bioconductor
##  GenomicRanges          1.54.1     2023-10-29 [2] Bioconductor
##  ggplot2              * 3.5.0      2024-02-23 [2] CRAN (R 4.3.2)
##  glue                   1.6.2      2022-02-24 [2] CRAN (R 4.3.2)
##  gtable                 0.3.4      2023-08-21 [2] CRAN (R 4.3.2)
##  highr                  0.10       2022-12-22 [2] CRAN (R 4.3.2)
##  hms                    1.1.3      2023-03-21 [1] CRAN (R 4.3.2)
##  htmltools              0.5.7      2023-11-03 [2] CRAN (R 4.3.2)
##  htmlwidgets            1.6.2      2023-03-17 [2] CRAN (R 4.3.2)
##  httpuv                 1.6.12     2023-10-23 [2] CRAN (R 4.3.2)
##  hwriter                1.3.2.1    2022-04-08 [1] CRAN (R 4.3.2)
##  igraph                 1.5.1      2023-08-10 [2] CRAN (R 4.3.2)
##  iNEXT                * 3.0.0      2022-08-29 [1] CRAN (R 4.3.2)
##  interp                 1.1-6      2024-01-26 [1] CRAN (R 4.3.2)
##  IRanges                2.36.0     2023-10-24 [2] Bioconductor
##  iterators              1.0.14     2022-02-05 [2] CRAN (R 4.3.2)
##  jpeg                   0.1-10     2022-11-29 [1] CRAN (R 4.3.2)
##  jquerylib              0.1.4      2021-04-26 [2] CRAN (R 4.3.2)
##  jsonlite               1.8.7      2023-06-29 [2] CRAN (R 4.3.2)
##  knitr                  1.45       2023-10-30 [2] CRAN (R 4.3.2)
##  labeling               0.4.3      2023-08-29 [2] CRAN (R 4.3.2)
##  later                  1.3.1      2023-05-02 [2] CRAN (R 4.3.2)
##  lattice              * 0.21-9     2023-10-01 [2] CRAN (R 4.3.2)
##  latticeExtra           0.6-30     2022-07-04 [1] CRAN (R 4.3.2)
##  lifecycle              1.0.3      2022-10-07 [2] CRAN (R 4.3.2)
##  lubridate            * 1.9.3      2023-09-27 [1] CRAN (R 4.3.2)
##  magrittr               2.0.3      2022-03-30 [2] CRAN (R 4.3.2)
##  MASS                   7.3-60     2023-05-04 [2] CRAN (R 4.3.2)
##  Matrix                 1.6-1.1    2023-09-18 [2] CRAN (R 4.3.2)
##  MatrixGenerics         1.14.0     2023-10-24 [2] Bioconductor
##  matrixStats            1.1.0      2023-11-07 [2] CRAN (R 4.3.2)
##  memoise                2.0.1      2021-11-26 [2] CRAN (R 4.3.2)
##  mgcv                   1.9-0      2023-07-11 [2] CRAN (R 4.3.2)
##  mime                   0.12       2021-09-28 [2] CRAN (R 4.3.2)
##  miniUI                 0.1.1.1    2018-05-18 [2] CRAN (R 4.3.2)
##  multtest               2.58.0     2023-10-24 [1] Bioconductor
##  munsell                0.5.0      2018-06-12 [2] CRAN (R 4.3.2)
##  nlme                   3.1-163    2023-08-09 [2] CRAN (R 4.3.2)
##  pacman               * 0.5.1      2019-03-11 [1] CRAN (R 4.3.2)
##  patchwork            * 1.2.0.9000 2024-03-13 [1] Github (thomasp85/patchwork@d943757)
##  permute              * 0.9-7      2022-01-27 [1] CRAN (R 4.3.2)
##  phyloseq             * 1.46.0     2023-10-24 [1] Bioconductor
##  pillar                 1.9.0      2023-03-22 [2] CRAN (R 4.3.2)
##  pkgbuild               1.4.2      2023-06-26 [2] CRAN (R 4.3.2)
##  pkgconfig              2.0.3      2019-09-22 [2] CRAN (R 4.3.2)
##  pkgload                1.3.3      2023-09-22 [2] CRAN (R 4.3.2)
##  plyr                   1.8.9      2023-10-02 [2] CRAN (R 4.3.2)
##  png                    0.1-8      2022-11-29 [2] CRAN (R 4.3.2)
##  prettyunits            1.2.0      2023-09-24 [2] CRAN (R 4.3.2)
##  processx               3.8.2      2023-06-30 [2] CRAN (R 4.3.2)
##  profvis                0.3.8      2023-05-02 [2] CRAN (R 4.3.2)
##  promises               1.2.1      2023-08-10 [2] CRAN (R 4.3.2)
##  ps                     1.7.5      2023-04-18 [2] CRAN (R 4.3.2)
##  purrr                * 1.0.2      2023-08-10 [2] CRAN (R 4.3.2)
##  R6                     2.5.1      2021-08-19 [2] CRAN (R 4.3.2)
##  RColorBrewer           1.1-3      2022-04-03 [2] CRAN (R 4.3.2)
##  Rcpp                 * 1.0.11     2023-07-06 [2] CRAN (R 4.3.2)
##  RcppParallel           5.1.7      2023-02-27 [2] CRAN (R 4.3.2)
##  RCurl                  1.98-1.13  2023-11-02 [2] CRAN (R 4.3.2)
##  readr                * 2.1.5      2024-01-10 [1] CRAN (R 4.3.2)
##  remotes                2.4.2.1    2023-07-18 [2] CRAN (R 4.3.2)
##  reshape2               1.4.4      2020-04-09 [2] CRAN (R 4.3.2)
##  rhdf5                  2.46.1     2023-11-29 [1] Bioconductor 3.18 (R 4.3.2)
##  rhdf5filters           1.14.1     2023-11-06 [1] Bioconductor
##  Rhdf5lib               1.24.2     2024-02-07 [1] Bioconductor 3.18 (R 4.3.2)
##  rlang                  1.1.2      2023-11-04 [2] CRAN (R 4.3.2)
##  rmarkdown              2.25       2023-09-18 [2] CRAN (R 4.3.2)
##  Rsamtools              2.18.0     2023-10-24 [2] Bioconductor
##  rstudioapi             0.15.0     2023-07-07 [2] CRAN (R 4.3.2)
##  S4Arrays               1.2.0      2023-10-24 [2] Bioconductor
##  S4Vectors              0.40.1     2023-10-26 [2] Bioconductor
##  sass                   0.4.7      2023-07-15 [2] CRAN (R 4.3.2)
##  scales                 1.3.0      2023-11-28 [2] CRAN (R 4.3.2)
##  sessioninfo            1.2.2      2021-12-06 [2] CRAN (R 4.3.2)
##  shiny                  1.7.5.1    2023-10-14 [2] CRAN (R 4.3.2)
##  ShortRead              1.60.0     2023-10-24 [1] Bioconductor
##  SparseArray            1.2.1      2023-11-05 [2] Bioconductor
##  stringi                1.7.12     2023-01-11 [2] CRAN (R 4.3.2)
##  stringr              * 1.5.0      2022-12-02 [2] CRAN (R 4.3.2)
##  SummarizedExperiment   1.32.0     2023-10-24 [2] Bioconductor
##  survival               3.5-7      2023-08-14 [2] CRAN (R 4.3.2)
##  tibble               * 3.2.1      2023-03-20 [2] CRAN (R 4.3.2)
##  tidyr                * 1.3.0      2023-01-24 [2] CRAN (R 4.3.2)
##  tidyselect             1.2.0      2022-10-10 [2] CRAN (R 4.3.2)
##  tidyverse            * 2.0.0      2023-02-22 [1] CRAN (R 4.3.2)
##  timechange             0.3.0      2024-01-18 [1] CRAN (R 4.3.2)
##  tzdb                   0.4.0      2023-05-12 [1] CRAN (R 4.3.2)
##  urlchecker             1.0.1      2021-11-30 [2] CRAN (R 4.3.2)
##  usethis              * 2.2.2      2023-07-06 [2] CRAN (R 4.3.2)
##  utf8                   1.2.4      2023-10-22 [2] CRAN (R 4.3.2)
##  vctrs                  0.6.4      2023-10-12 [2] CRAN (R 4.3.2)
##  vegan                * 2.6-4      2022-10-11 [1] CRAN (R 4.3.2)
##  withr                  2.5.2      2023-10-30 [2] CRAN (R 4.3.2)
##  xfun                   0.41       2023-11-01 [2] CRAN (R 4.3.2)
##  xtable                 1.8-4      2019-04-21 [2] CRAN (R 4.3.2)
##  XVector                0.42.0     2023-10-24 [2] Bioconductor
##  yaml                   2.3.7      2023-01-23 [2] CRAN (R 4.3.2)
##  zlibbioc               1.48.0     2023-10-24 [2] Bioconductor
## 
##  [1] /home/jmc753/R/x86_64-pc-linux-gnu-library/4.3
##  [2] /programs/R-4.3.2/library
## 
## ──────────────────────────────────────────────────────────────────────────────